import sys # we require code from other folders
import pandas as pd
import numpy as np
import itertools
import pickle
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'DeepMoA: method to predict the mechanism of action of cancer drugs
Select data and import libraries
import seaborn as sns
import matplotlib.pyplot as plt
CB_color_cycle = ['#EECC16', '#62BB35', '#FDAE33','#208EA3', '#EA4E9D', '#984ea3','#999999', '#e41a1c', '#dede00']
#sns.set_style("darkgrid")import matplotlib.font_manager as fm
font_files = fm.findSystemFonts()
plt.rcdefaults()
# Go through and add each to Matplotlib's font cache.
for font_file in font_files:
fm.fontManager.addfont(font_file)
plt.rc('font', family='Roboto')plt.rc('font', family='Roboto')
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Roboto'#%config InlineBackend.figure_format='retina'# pytorch relates imports
import torch
import torch.nn as nn
import torch.optim as optim
# imports from captum library
from captum.attr import LayerDeepLift# for combobox
from __future__ import print_function
from ipywidgets import interact, interactive, fixed, interact_manual
import ipywidgets as widgetsimport plotly.express as px
import plotly.graph_objects as go
import plotly.io as piopd.options.display.min_rows = 20000
pd.set_option('max_colwidth', 200)pd.options.display.max_rows = 20000pd.set_option('min_rows', 20000)mac = "/Users/katyna/Library/CloudStorage/OneDrive-Tecnun/"
windows = "C:/Users/ksada/OneDrive - Tecnun/"
computer = windows # CHANGEsys.path.append(computer + "SparseGO_code/code")
import util
from util import *%matplotlib inline#%matplotlib inline
# To make histograms
def histogram(dataframe, color, title, ylabel,n_bins):
N, bins, patches = plt.hist(dataframe, color=color,bins=n_bins, linewidth=0.1)
for i in range(0,len(bins)-1):
if bins[i]<0.05:
patches[i].set_facecolor(CB_color_cycle[2])
plt.xlabel("P-value", fontsize=16)
plt.ylabel(ylabel, fontsize=16)
plt.title(title, fontsize=16)
plt.xticks(fontsize=14)
plt.yticks(fontsize=14)
ax = plt.subplot(111)
ax.spines["top"].set_visible(False)
ax.spines["right"].set_visible(False) inputdir = computer+"SparseGO_code/data/cross_validation_expression/allsamples/" # CHANGE
dir1=computer+"Tesis/Codigo/VariableImportance/"
dir2=computer+"SparseGO_code/results/weights&biases/Expression_MSE_all/" # CHANGE
resultsdir=dir2gene2id = inputdir+"gene2ind.txt"
cell2id=inputdir+"cell2ind.txt"
drug2id=inputdir+"drug2ind.txt"
drug2fingerprint=inputdir+"drug2fingerprint.txt"
load=resultsdir+"last_model.pt"
onto = inputdir+"ontology.txt" # CHANGE
genotype=inputdir+"cell2expression.txt" # CHANGE
num_neurons_per_GO = 6 # CHANGEDeepLIFT
gene2id_mapping = load_mapping(gene2id)
dG, terms_pairs, genes_terms_pairs = load_ontology(onto, gene2id_mapping)
sorted_pairs, level_list, level_number = sort_pairs(genes_terms_pairs, terms_pairs, dG, gene2id_mapping)
layer_connections = pairs_in_layers(sorted_pairs, level_list, level_number)
cell_features = np.genfromtxt(genotype, delimiter=',')
drug_features = np.genfromtxt(drug2fingerprint, delimiter=',')
drug2id_mapping = load_mapping(drug2id)
cell2id_mapping = load_mapping(cell2id)
num_genes = len(gene2id_mapping)
drug_dim = len(drug_features[0,:])There are 15015 genes
There are 1 roots: GO:0008150
There are 4184 terms
There are 1 connected components
model = torch.load(load, map_location='cuda:%d' % 0)modelsparseGO_nn(
(genes_terms_sparse_linear_1): SparseLinearNew(
in_features=15015, out_features=25104, bias=True, sparsity=0.0030196221878822263, connectivity=tensor([[ 0, 1, 2, ..., 23721, 23722, 23723],
[ 0, 0, 0, ..., 15014, 15014, 15014]], device='cuda:0'), small_world=False
)
(genes_terms_tanh): Tanh()
(genes_terms_batchnorm): BatchNorm1d(25104, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(GO_terms_sparse_linear_1): SparseLinearNew(
in_features=25104, out_features=8304, bias=True, sparsity=0.002372788160788691, connectivity=tensor([[ 966, 967, 968, ..., 7047, 7048, 7049],
[ 0, 0, 0, ..., 25103, 25103, 25103]], device='cuda:0'), small_world=False
)
(GO_terms_tanh_1): Tanh()
(GO_terms_batchnorm_1): BatchNorm1d(8304, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(GO_terms_sparse_linear_2): SparseLinearNew(
in_features=8304, out_features=3684, bias=True, sparsity=0.003911619061964564, connectivity=tensor([[ 0, 1, 2, ..., 3681, 3682, 3683],
[ 0, 0, 0, ..., 8303, 8303, 8303]], device='cuda:0'), small_world=False
)
(GO_terms_tanh_2): Tanh()
(GO_terms_batchnorm_2): BatchNorm1d(3684, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(GO_terms_sparse_linear_3): SparseLinearNew(
in_features=3684, out_features=1650, bias=True, sparsity=0.007924193070772875, connectivity=tensor([[ 150, 151, 152, ..., 1641, 1642, 1643],
[ 0, 0, 0, ..., 3683, 3683, 3683]], device='cuda:0'), small_world=False
)
(GO_terms_tanh_3): Tanh()
(GO_terms_batchnorm_3): BatchNorm1d(1650, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(GO_terms_sparse_linear_4): SparseLinearNew(
in_features=1650, out_features=726, bias=True, sparsity=0.015807663410969196, connectivity=tensor([[ 474, 475, 476, ..., 711, 712, 713],
[ 0, 0, 0, ..., 1649, 1649, 1649]], device='cuda:0'), small_world=False
)
(GO_terms_tanh_4): Tanh()
(GO_terms_batchnorm_4): BatchNorm1d(726, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(GO_terms_sparse_linear_5): SparseLinearNew(
in_features=726, out_features=318, bias=True, sparsity=0.03305785123966942, connectivity=tensor([[ 60, 61, 62, ..., 105, 106, 107],
[ 0, 0, 0, ..., 725, 725, 725]], device='cuda:0'), small_world=False
)
(GO_terms_tanh_5): Tanh()
(GO_terms_batchnorm_5): BatchNorm1d(318, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(GO_terms_sparse_linear_6): SparseLinearNew(
in_features=318, out_features=120, bias=True, sparsity=0.06981132075471698, connectivity=tensor([[ 0, 1, 2, ..., 93, 94, 95],
[ 0, 0, 0, ..., 317, 317, 317]], device='cuda:0'), small_world=False
)
(GO_terms_tanh_6): Tanh()
(GO_terms_batchnorm_6): BatchNorm1d(120, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(GO_terms_sparse_linear_7): SparseLinearNew(
in_features=120, out_features=42, bias=True, sparsity=0.2, connectivity=tensor([[ 18, 19, 20, ..., 21, 22, 23],
[ 0, 0, 0, ..., 119, 119, 119]], device='cuda:0'), small_world=False
)
(GO_terms_tanh_7): Tanh()
(GO_terms_batchnorm_7): BatchNorm1d(42, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(GO_terms_sparse_linear_8): SparseLinearNew(
in_features=42, out_features=30, bias=True, sparsity=1.0, connectivity=tensor([[ 0, 1, 2, ..., 27, 28, 29],
[ 0, 0, 0, ..., 41, 41, 41]], device='cuda:0'), small_world=False
)
(GO_terms_tanh_8): Tanh()
(GO_terms_batchnorm_8): BatchNorm1d(30, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(drug_linear_layer_1): Linear(in_features=2048, out_features=200, bias=True)
(drug_tanh_1): Tanh()
(drug_batchnorm_layer_1): BatchNorm1d(200, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(drug_linear_layer_2): Linear(in_features=200, out_features=100, bias=True)
(drug_tanh_2): Tanh()
(drug_batchnorm_layer_2): BatchNorm1d(100, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(drug_linear_layer_3): Linear(in_features=100, out_features=50, bias=True)
(drug_tanh_3): Tanh()
(drug_batchnorm_layer_3): BatchNorm1d(50, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(final_linear_layer): Linear(in_features=80, out_features=40, bias=True)
(final_tanh): Tanh()
(final_batchnorm_layer): BatchNorm1d(40, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(final_aux_linear_layer): Linear(in_features=40, out_features=1, bias=True)
(final_aux_tanh): Tanh()
(final_linear_layer_output): Linear(in_features=1, out_features=1, bias=True)
)
# Save layers to be analyzed
model_layers = []
model_layers.append(model.genes_terms_sparse_linear_1)
model_layers.append(model.GO_terms_sparse_linear_1)
model_layers.append(model.GO_terms_sparse_linear_2)
model_layers.append(model.GO_terms_sparse_linear_3)
model_layers.append(model.GO_terms_sparse_linear_4)
model_layers.append(model.GO_terms_sparse_linear_5)
model_layers.append(model.GO_terms_sparse_linear_6)
model_layers.append(model.GO_terms_sparse_linear_7)GO terms info
# Go term names
gene_ontology = pd.read_excel('all_go_terms_info.xlsx')Get all layers’ GO term with the neuron number
all_terms_ids = {}
all_terms_names = {}
all_layers_non_virtual = {} # store only terms that are part of the layer (remove virtual), those are the important attribuitions
all_layers_non_virtual_names = {}
num_neurons_per_GO = 6
for layer_number in range(len(layer_connections)-1):
layer_pairs = layer_connections[layer_number]
terms_ids = []
names = []
output_id = create_index(layer_pairs[:,0]) # first 6 neurons correspond to the term with key 0
for term in output_id.keys():
#name = gene_ontology.loc[gene_ontology['GO_term'] == term].to_numpy()[0,3].replace("_"," ").capitalize()
name = gene_ontology.loc[gene_ontology['id'] == term].to_numpy()[0,1].capitalize()
for i in range(1,num_neurons_per_GO+1): # vector que tiene GO:0000038_1, GO:0000038_2 ... GO:0000038_6 y asi luego concatenar con las attributions
terms_ids.append(term+"_"+str(i))
names.append(name+" ("+str(i)+")")
all_terms_ids[layer_number] = np.array(terms_ids)
all_terms_names[layer_number] = np.array(names)
non_virtual = [] # store the terms part of that layer
non_virtual_names = []
for term in level_list[layer_number+1]:
nv_name = gene_ontology.loc[gene_ontology['id'] == term].to_numpy()[0,1].capitalize()
for i in range(1,7):
non_virtual.append(term+"_"+str(i))
non_virtual_names.append(nv_name+" ("+str(i)+")")
all_layers_non_virtual[layer_number] = non_virtual
all_layers_non_virtual_names[layer_number] = non_virtual_namesAll GO terms part of a layer (non-virtual) with their corresponding name and layer number…
real_go_info = pd.DataFrame({"GO_term":[],"Name":[],"layer_number":[]})
for layer_number in range(len(layer_connections)-1):
layer_go_info = pd.DataFrame({"GO_term":all_layers_non_virtual[layer_number],"Name":all_layers_non_virtual_names[layer_number],"layer_number":(layer_number)})
real_go_info = pd.concat((real_go_info,layer_go_info))
real_go_info.head()| GO_term | Name | layer_number | |
|---|---|---|---|
| 0 | GO:0000019_1 | Regulation of mitotic recombination (1) | 0.0 |
| 1 | GO:0000019_2 | Regulation of mitotic recombination (2) | 0.0 |
| 2 | GO:0000019_3 | Regulation of mitotic recombination (3) | 0.0 |
| 3 | GO:0000019_4 | Regulation of mitotic recombination (4) | 0.0 |
| 4 | GO:0000019_5 | Regulation of mitotic recombination (5) | 0.0 |
Drugs info
def get_compound_names(file_name):
compounds = []
with open(file_name, 'r') as fi:
for line in fi:
tokens = line.strip().split('\t')
compounds.append([tokens[1],tokens[2]])
return compoundsdrugs = get_compound_names(inputdir+"compound_names.txt")
drugs.pop(0)['SMILE', 'Name']
DeepLIFT for VNN
Reference activation… (baseline)
median_cell_features = np.median(cell_features,axis=0) # to use as a reference
median_drug_features = np.genfromtxt(computer+"SparseGO_code/data/glucose_fingerprint.txt", delimiter=',')Attribution function: sum
def get_layer_attribution(layer_number,input_data,baseline,selected_drug_data):
dl = LayerDeepLift(model, model_layers[layer_number],multiply_by_inputs = True) # CHOOSE LAYER TO STUDY
dl_attr_test = dl.attribute(input_data,baseline)
dl_attr_test_sum = dl_attr_test.cpu().detach().numpy().sum(0) # se suman las attributions para cada sample
attribution_data = pd.DataFrame(np.column_stack((all_terms_ids[layer_number],dl_attr_test_sum)), columns=["GO_term",selected_drug_data[1]])
attribution_data[[selected_drug_data[1]]] = attribution_data[[selected_drug_data[1]]].apply(pd.to_numeric).round(10)
attribution_data = attribution_data.loc[attribution_data['GO_term'].isin(all_layers_non_virtual[layer_number])] # only the keep the non virtual terms
return attribution_dataDeepLIFT for all drugs
attribution_data_all = pd.DataFrame()
# Obtain the top GO terms on all layers for each drug
for selected_drug_data in drugs:
selected_drug =selected_drug_data[0] # DRUG smile
selected_drug_features = []
drug_specific_features=drug_features[drug2id_mapping[selected_drug]] # features of drug
for i in range(len(cell2id_mapping)): # make all combinations of selected_drug and cell types
selected_drug_features.append(np.concatenate((cell_features[i], drug_specific_features), axis=None))
selected_drug_features = torch.FloatTensor(np.array(selected_drug_features))
# Data for deeplift...
input_data = torch.autograd.Variable(selected_drug_features.cuda(0))
#median_drug_features = drug_specific_features
# baseline is the median of the expression data and drug features
baseline = torch.FloatTensor(np.concatenate((median_cell_features, median_drug_features), axis=None))
baseline = torch.reshape(baseline, (1, baseline.size()[0]))
baseline = torch.autograd.Variable(baseline.cuda(0))
attribution_data_drug = list(map(get_layer_attribution,range(0,len(model_layers)),itertools.repeat(input_data, len(model_layers)),itertools.repeat(baseline, len(model_layers)),itertools.repeat(selected_drug_data, len(model_layers)))) # get the attribution for each layer (map is similar to apply)
attribution_data_drug = pd.concat(attribution_data_drug) # concatenate attribution of all layers
attribution_data_all = pd.concat([attribution_data_all,attribution_data_drug.iloc[:,1]], axis=1)
print(selected_drug_data[1])
attribution_data_all = pd.concat([attribution_data_drug.iloc[:,0],attribution_data_all], axis=1)attribution_data_all = attribution_data_all.set_index("GO_term")attribution_data_all.head()| BRD-K02251932-001-01-3 | BRD-K25737009-001-01-2 | Nintedanib | bicalutamide | N-[(2R,3S)-2-[[cyclopropylmethyl(methyl)amino]methyl]-5-[(2R)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2H-1,5-benzoxazocin-8-yl]-1-methyl-4-imidazolesulfonamide | PHA-665752 | N-cyclopropyl-3-[3-[[cyclopropyl(oxo)methyl]amino]-1H-indazol-6-yl]benzamide | Ki8751 | IPA-3 | FAWUGYGEBHAQBU-PPEXNQRJSA-N | ... | ML031 | Semagacestat | RITA | CDK9 inhibitor | Dasatinib | BMS-536924;CC1=CC(=CC2=C1NC(=C3C(=CC=NC3=O)NC[C@H](C4=CC(=CC=C4)Cl)O)N2)N5CCOCC5 | SCHEMBL13741284 | Daporinad | STF-31 | Narciclasine | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| GO_term | |||||||||||||||||||||
| GO:0000012_1 | -0.006564 | -0.005680 | 0.003188 | -0.005863 | -0.003410 | -0.002951 | 0.001118 | 0.002033 | 0.000799 | -0.007842 | ... | -0.007256 | -0.004271 | -0.013783 | -0.006253 | 0.002864 | 0.009604 | -0.008099 | -0.001475 | -0.003698 | -0.009866 |
| GO:0000012_2 | 0.010029 | 0.011514 | 0.009892 | 0.012072 | 0.005788 | 0.012909 | 0.002316 | 0.009362 | -0.011816 | 0.000166 | ... | 0.008918 | -0.002449 | 0.017704 | 0.006732 | 0.002447 | 0.006485 | 0.003888 | -0.000569 | 0.001628 | 0.017132 |
| GO:0000012_3 | 0.008466 | 0.006840 | -0.000027 | 0.006379 | 0.003082 | -0.006110 | -0.008877 | -0.000347 | -0.013084 | 0.000150 | ... | -0.006096 | 0.011308 | 0.012216 | 0.000997 | 0.011521 | 0.013800 | 0.002843 | 0.016328 | 0.021640 | 0.003536 |
| GO:0000012_4 | 0.013018 | 0.007276 | 0.010128 | 0.008622 | 0.004795 | 0.006706 | 0.000874 | 0.005514 | -0.003347 | -0.000010 | ... | -0.003682 | 0.006544 | 0.010806 | 0.003346 | 0.017556 | 0.023130 | 0.001105 | 0.009710 | 0.016940 | 0.014787 |
| GO:0000012_5 | -0.007076 | -0.006129 | -0.007634 | -0.003785 | -0.004151 | -0.007947 | -0.008430 | -0.006039 | -0.002722 | 0.002163 | ... | 0.001821 | -0.002346 | -0.007831 | -0.009368 | -0.011118 | -0.003408 | -0.001760 | 0.003593 | -0.000109 | -0.020831 |
5 rows × 684 columns
ChEMBL Drug Target Slim
from chembl_webresource_client.new_client import new_clientImport SparseGO drugs
# Get names
def get_compound_names(file_name):
compounds = []
with open(file_name, 'r') as fi:
for line in fi:
tokens = line.strip().split('\t')
compounds.append(tokens[2].lower())
return compounds
names = get_compound_names(computer+"SparseGO_code/data/compound_names.txt")
names.pop(0)chEML IDs
Get chembl IDs of drugs if available (there are always 684 drugs, the compounds2ids object can be reused)
# Get all chembl IDs -- tarda
molecule = new_client.molecule
compounds2ids = {}
for i,drug in enumerate(names):
if " + " in drug:
drug_split = drug.split(" + ", 1)
ID1 = list(molecule.filter(pref_name__iexact=drug_split[0]).only('molecule_chembl_id'))
ID2 = list(molecule.filter(pref_name__iexact=drug_split[1]).only('molecule_chembl_id'))
if len(ID1)>0 and len(ID2)>0:
compounds2ids[drug]=[ID1[0]['molecule_chembl_id'],ID2[0]['molecule_chembl_id']]
elif len(ID1)>0:
compounds2ids[drug]=ID1[0]['molecule_chembl_id']
elif len(ID2)>0:
compounds2ids[drug]=ID2[0]['molecule_chembl_id']
else:
print(drug,i)
else:
ID = list(molecule.filter(pref_name__iexact=drug).only('molecule_chembl_id'))
if len(ID)>0:
ID = ID[0]['molecule_chembl_id']
compounds2ids[drug]=ID
else:
# for drugs that have the chembl ID as the name!!
ID = list(molecule.filter(chembl_id=drug).only('molecule_chembl_id'))
if len(ID)>0:
ID = ID[0]['molecule_chembl_id']
compounds2ids[drug]=ID
else:
# in case it is not found by pref_name
ID = list(molecule.filter(molecule_synonyms__molecule_synonym__iexact=drug).only('molecule_chembl_id'))
if len(ID)>0:
ID = ID[0]['molecule_chembl_id']
compounds2ids[drug]=ID
else:
print(drug,i)
# 341 chembl IDs where found (october 31 2022)#manually add 6 more
compounds2ids["teniposide [usan]"]="CHEMBL452231"
compounds2ids["docetaxel (taxotere)"]="CHEMBL92"
compounds2ids["nan + navitoclax(1)"]="CHEMBL443684"
compounds2ids["nan + navitoclax(2)"]="CHEMBL443684"
compounds2ids["osi-027;coc1=cc=cc2=cc(=c3c4=c(n=cnn4c(=n3)c5ccc(cc5)c(=o)o)n)n=c21"]="CHEMBL3120215"
compounds2ids["paclitaxel;cc1=c2[c@h](c(=o)[c@@]3([c@h](c[c@@h]4[c@]([c@h]3[c@@h]([c@@](c2(c)c)(c[c@@h]1oc(=o)[c@@h]([c@h](c5=cc=cc=c5)nc(=o)c6=cc=cc=c6)o)o)oc(=o)c7=cc=cc=c7)(co4)oc(=o)c)o)c)oc(=o)c"]="CHEMBL428647"len(compounds2ids)347
chEMBL MoA (targets)
Get the molecule targets of each drug (if available)
compounds2targets = dict() # required to store the drug targets
for drug in compounds2ids.keys():
compounds2targets[drug] = set()
chembl_ids = list(compounds2ids.values()) # Chembl IDs of drugs
for drug in compounds2ids:
# we jump from compounds to targets through activities:
activities = new_client.mechanism.filter(parent_molecule_chembl_id__in=compounds2ids[drug]).only(
['parent_molecule_chembl_id', 'target_chembl_id'])
# extracting target ChEMBL IDs from activities:
for act in activities:
compounds2targets[drug].add(act['target_chembl_id'])
print(drug)
# We now know all targets for some drugcompounds2targets = {k: v for k, v in compounds2targets.items() if len(v) != 0 and len([x for x in list(v) if x is not None]) != 0 }
# 218 DRUGS HAVE ANNOTATED DRUG TARGETSlen(compounds2targets)220
Drug slim GO terms
Get the GO terms of each target
# Get the GO terms of each target
compounds_GOterms = {}
for i in range(0, len(compounds2targets.keys())):
compound = list(compounds2targets.keys())[i]
GOterms_list = []
for j in range(0, len(list(compounds2targets[compound]))):
target = list(compounds2targets[compound])[j]
all_cross_references = list(new_client.target.filter(target_chembl_id=target).only(['target_components']).only(['target_components_xrefs']))[0]['target_components']
if len(all_cross_references)>0: # not all targets have annotated go_terms
for i in range(0, len(all_cross_references)):
GOterms = all_cross_references[i]
GOterms = pd.DataFrame(GOterms['target_component_xrefs'])
GOterms = pd.concat([GOterms,pd.Series([target]).repeat(len(GOterms)).reset_index().pop(0)],axis=1) # add target ID to dataframe
GOterms_list= GOterms_list + GOterms.values.tolist()
compounds_GOterms[compound] = pd.DataFrame(GOterms_list).drop_duplicates()
print(compound)len(compounds_GOterms)220
# we have 206 annotated drugs on CHEMBL# add GO terms found in CTRPv2CTRPv2_terms = pd.read_excel('ctrp_goterms_drugs.xlsx') # add GO terms of drugs with or without annotations
for drug in CTRPv2_terms["Drug"].unique():
if drug not in list(compounds_GOterms.keys()): # some drugs had no previous data, no annotations from chembl
compounds_GOterms[drug] = pd.DataFrame() # create empty dataframe
for term in list(CTRPv2_terms.loc[CTRPv2_terms["Drug"]==drug]["Field"]):
compounds_GOterms[drug] = pd.concat([compounds_GOterms[drug],pd.DataFrame([term,"","GoProcess",""]).transpose()])
compounds_GOterms[drug] = compounds_GOterms[drug].drop_duplicates()
# now we have 233 annotated drugs# Delete drugs with no GOterms (some targets have no annotated GO terms)
compounds_GOterms = {k: v for k, v in compounds_GOterms.items() if len(v) != 0 } len(compounds_GOterms)236
Match GO terms
Find all terms that match, terms that are part of both, the sparseGO graph and the drug slim results…
def load_ontology_extra_output(ontology_file, gene2id_mapping):
"""
Creates the directed graph of the GO terms and stores the connected elements in arrays.
Output
------
dG: networkx.classes.digraph.DiGraph
Directed graph of all terms
terms_pairs: numpy.ndarray
Store the connection between a term and a term
genes_terms_pairs: numpy.ndarray
Store the connection between a gene and a term
"""
dG = nx.DiGraph() # Directed graph class
file_handle = open(ontology_file) # Open the file that has genes and go terms
terms_pairs = [] # store the pairs between a term and a term
genes_terms_pairs = [] # store the pairs between a gene and a term
gene_set = set() # create a set (elements can't repeat)
term_direct_gene_map = {}
term_size_map = {}
for line in file_handle:
line = line.rstrip().split() # delete spaces and transform to list, line has 3 elements
# No me hace falta el if, no tengo que separar las parejas
if line[2] == 'default': # si el tercer elemento es default entonces se conectan los terms en el grafo
dG.add_edge(line[0], line[1]) # Add an edge between line[0] and line[1]
terms_pairs.append([line[0], line[1]]) # Add the pair to the list
else:
if line[1] not in gene2id_mapping: # se salta el gen si no es parte de los que estan en gene2id_mapping
print(line[1])
continue
genes_terms_pairs.append([line[0], line[1]]) # add the pair
if line[0] not in term_direct_gene_map: # si el termino todavia no esta en el diccionario lo agrega
term_direct_gene_map[ line[0] ] = set() # crea un set
term_direct_gene_map[line[0]].add(gene2id_mapping[line[1]]) # añadimos el gen al set de ese term
gene_set.add(line[1]) # añadimos el gen al set total de genes
terms_pairs = np.array(terms_pairs) # convert to 2d array
genes_terms_pairs = np.array(genes_terms_pairs) # convert to 2d array
file_handle.close()
print('There are', len(gene_set), 'genes')
for term in dG.nodes(): # hacemos esto para cada uno de los GO terms
term_gene_set = set() # se crea un set
if term in term_direct_gene_map:
term_gene_set = term_direct_gene_map[term] # genes conectados al term
deslist = nxadag.descendants(dG, term) #regresa todos sus GO terms descendientes (biological processes tiene 2085 descendientes, todos menos el mismo)
for child in deslist:
if child in term_direct_gene_map: # añadir los genes de sus descendientes
term_gene_set = term_gene_set | term_direct_gene_map[child] # union of both sets, ahora tiene todos los genes los suyos y los de sus descendientes
if len(term_gene_set) == 0:
print('There is empty terms, please delete term:', term)
sys.exit(1)
else:
# por ahora esta variable no me hace falta
term_size_map[term] = len(term_gene_set) # cantidad de genes en ese term (tomando en cuenta sus descendientes)
leaves = [n for n in dG.nodes if dG.in_degree(n) == 0] # buscar la raiz
#leaves = [n for n,d in dG.in_degree() if d==0]
uG = dG.to_undirected() # Returns an undirected representation of the digraph
connected_subG_list = list(nxacc.connected_components(uG)) #list of all GO terms
# Verify my graph makes sense...
print('There are', len(leaves), 'roots:', leaves[0])
print('There are', len(dG.nodes()), 'terms')
print('There are', len(connected_subG_list), 'connected components')
if len(leaves) > 1:
print('There are more than 1 root of ontology. Please use only one root.')
sys.exit(1)
if len(connected_subG_list) > 1:
print( 'There are more than connected components. Please connect them.')
sys.exit(1)
return dG, terms_pairs, genes_terms_pairs, term_direct_gene_map, term_size_mapSparseGO graph
# Import SparseGO graph (to extract all nodes/terms)...
# Load ontology: create the graph of connected GO terms
dG, terms_pairs, genes_terms_pairs, term_direct_gene_map, term_size_map = load_ontology_extra_output(onto, gene2id_mapping)
####
sparseGO_terms = list(dG.nodes())
sparseGO_terms.remove("GO:0008150")There are 15015 genes
There are 1 roots: GO:0008150
There are 4184 terms
There are 1 connected components
Full GO graph
# Import full graph (to find parents)...
import obonet
#import networkx as nx
url = 'http://purl.obolibrary.org/obo/go/go-basic.obo'
full_graph = obonet.read_obo(url)
full_graph = full_graph.reverse() # change the direction of nodes
[n for n in full_graph.nodes if full_graph.in_degree(n) == 0] # graph contains the 3 roots (BP,MF,CC)['GO:0003674', 'GO:0005575', 'GO:0008150']
Match terms!
Find all terms that match, terms that are part of both, the sparseGO graph and the drug slim results… if the slim terms’ ascendants are a match, they are also added
# Each model has DIFFERENT matches (the graph is different)
compounds_GOterms_matches = {}
for drug in compounds_GOterms.keys():
# choose drug
drug_df = compounds_GOterms[drug]
drug_slim_GOterms = set(drug_df.loc[drug_df[2] == "GoProcess"][0]) # only GO processes
#set(sparseGO_terms) & set(drug_slim_GOterms)
drug_matches = [] # store all directly matched terms and matches with all parents
for term in drug_slim_GOterms: # term ='GO:1902669' # buen ejemplo
if term in sparseGO_terms: # is the term in the sparseGO terms?
drug_matches.append([1,term]) # add to list
#1: same term, 2:not direct match (esto igual despues...the number indicates how direct is the relationship 0:same term, 1: parent, 2: grandpa, 3:...)
# are its ascendants in the sparseGO terms?
parents = [source for source, _ in full_graph.in_edges(term)] # parents of term
relationship = 2
while(len(parents)>0): # check all ascendants
#relationship+=1
parents = [source for source, _ in full_graph.in_edges(parents)] # parents of parents
for parent_term in parents: # add parents that match sparseGO terms
if parent_term in sparseGO_terms:
drug_matches.append([relationship, parent_term])
drug_matches = (pd.DataFrame(drug_matches).drop_duplicates()).values.tolist() # remove duplicates
compounds_GOterms_matches[drug] = drug_matches
print(drug)# delete drugs that have no matches
compounds_GOterms_matches = {i:j for i,j in compounds_GOterms_matches.items() if j != []}len(compounds_GOterms_matches)230
SparseGO terms x drugSlim terms matrix
attribution_data_all.columns = attribution_data_all.columns.str.lower() # in order to match the termattribution_data_all.head()| brd-k02251932-001-01-3 | brd-k25737009-001-01-2 | nintedanib | bicalutamide | n-[(2r,3s)-2-[[cyclopropylmethyl(methyl)amino]methyl]-5-[(2r)-1-hydroxypropan-2-yl]-3-methyl-6-oxo-3,4-dihydro-2h-1,5-benzoxazocin-8-yl]-1-methyl-4-imidazolesulfonamide | pha-665752 | n-cyclopropyl-3-[3-[[cyclopropyl(oxo)methyl]amino]-1h-indazol-6-yl]benzamide | ki8751 | ipa-3 | fawugygebhaqbu-ppexnqrjsa-n | ... | ml031 | semagacestat | rita | cdk9 inhibitor | dasatinib | bms-536924;cc1=cc(=cc2=c1nc(=c3c(=cc=nc3=o)nc[c@h](c4=cc(=cc=c4)cl)o)n2)n5ccocc5 | schembl13741284 | daporinad | stf-31 | narciclasine | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| GO_term | |||||||||||||||||||||
| GO:0000012_1 | -0.006564 | -0.005680 | 0.003188 | -0.005863 | -0.003410 | -0.002951 | 0.001118 | 0.002033 | 0.000799 | -0.007842 | ... | -0.007256 | -0.004271 | -0.013783 | -0.006253 | 0.002864 | 0.009604 | -0.008099 | -0.001475 | -0.003698 | -0.009866 |
| GO:0000012_2 | 0.010029 | 0.011514 | 0.009892 | 0.012072 | 0.005788 | 0.012909 | 0.002316 | 0.009362 | -0.011816 | 0.000166 | ... | 0.008918 | -0.002449 | 0.017704 | 0.006732 | 0.002447 | 0.006485 | 0.003888 | -0.000569 | 0.001628 | 0.017132 |
| GO:0000012_3 | 0.008466 | 0.006840 | -0.000027 | 0.006379 | 0.003082 | -0.006110 | -0.008877 | -0.000347 | -0.013084 | 0.000150 | ... | -0.006096 | 0.011308 | 0.012216 | 0.000997 | 0.011521 | 0.013800 | 0.002843 | 0.016328 | 0.021640 | 0.003536 |
| GO:0000012_4 | 0.013018 | 0.007276 | 0.010128 | 0.008622 | 0.004795 | 0.006706 | 0.000874 | 0.005514 | -0.003347 | -0.000010 | ... | -0.003682 | 0.006544 | 0.010806 | 0.003346 | 0.017556 | 0.023130 | 0.001105 | 0.009710 | 0.016940 | 0.014787 |
| GO:0000012_5 | -0.007076 | -0.006129 | -0.007634 | -0.003785 | -0.004151 | -0.007947 | -0.008430 | -0.006039 | -0.002722 | 0.002163 | ... | 0.001821 | -0.002346 | -0.007831 | -0.009368 | -0.011118 | -0.003408 | -0.001760 | 0.003593 | -0.000109 | -0.020831 |
5 rows × 684 columns
attribution_data_all.shape(25098, 684)
Only keep drugs that have annotated GO terms
attribution_data_annotated = attribution_data_all[list(compounds_GOterms_matches.keys())]attribution_data_annotated.shape # 230 DRUGS(25098, 230)
Build drugSlim (MoA) matrix
slim_matrix = attribution_data_annotated.copy() # copy dataframe in order to build a similar matrix
for col in slim_matrix.columns:
slim_matrix[col].values[:] = 0 # empty matrix for drug in compounds_GOterms_matches.keys():
drug_matches = compounds_GOterms_matches[drug]
drug_matches_names = list(pd.DataFrame(drug_matches)[1])
drug_matches_names_duplicated = []
for term in set(drug_matches_names):
for i in range(1,7):
drug_matches_names_duplicated.append(term+"_"+str(i))
slim_matrix[drug][drug_matches_names_duplicated] = 1 # add a 1 if term is annotated to drugSVM
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.linear_model import LogisticRegressionfrom sklearn import svmslim_matrix_single_neuron = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
preds_svm_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
platt_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
distance_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)
delta_logits_matrix = pd.DataFrame(0, index=sparseGO_terms, columns=slim_matrix.columns)Create models
Regression models…
# Dictionaries to store results
GO_terms_auc_svm = {}
GO_terms_aupr_svm = {}
GO_terms_precision_svm = {}
GO_terms_auc_delta_logits = {}
# Perform logistic
for goterm in sparseGO_terms:
# if (real_go_info[real_go_info["GO_term"]==goterm+"_1"]["layer_number"]).values >3:
# continue
# store results of each cross validation
all_y_test = []
all_y_pred_proba = []
all_y_pred_proba_dis = []
all_y_pred = []
all_y_names = []
goterm_drugs = slim_matrix.loc[[goterm+"_"+str(1)]].values.flatten()
if sum(goterm_drugs) <= 8: # at least 2 annotated drugs in each group
continue
list_nodes = []
for i in range(1,7):
list_nodes.append(goterm+"_"+str(i))
score = attribution_data_annotated.loc[list_nodes].T
#score_mod = score
score_mod = score.divide(score.std()).fillna(0) # AFECTA MUCHO
# Separate drugs in 4 groups for cross-validation -----
# Split data in 2 groups (with train_test_split in order to have 0s in both groups)
X_part1,X_part2,y_part1,y_part2=train_test_split(score_mod,goterm_drugs,test_size=0.50,random_state=0,stratify=goterm_drugs)
# Split data again in 4 groups (split data previously split)
X_group1,X_group2,y_group1,y_group2=train_test_split(X_part1,y_part1,test_size=0.50,random_state=0,stratify=y_part1)
X_group3,X_group4,y_group3,y_group4=train_test_split(X_part2,y_part2,test_size=0.50,random_state=0,stratify=y_part2)
for i in range(1,5):
vector = range(0,5)
group_number = str(i)
X_test = globals()["X_group"+group_number]
y_test = globals()["y_group"+group_number]
# Use the other 3 groups for training
keep = list({1,2,3,4}-{int(group_number)}) # remove group number of current test
X_train = pd.concat((globals()["X_group"+str(keep[0])],globals()["X_group"+str(keep[1])],globals()["X_group"+str(keep[2])]))
y_train = np.concatenate((globals()["y_group"+str(keep[0])],globals()["y_group"+str(keep[1])],globals()["y_group"+str(keep[2])]))
#gamma = 1/(X_train.shape[1]*X_train.to_numpy().var())
gamma = "scale"
C=1
svm_model = svm.SVC(C=C,gamma=gamma, kernel='rbf',
class_weight="balanced",
tol=0.001,
probability=True,
random_state=1234)
# svm_model = svm.SVC(gamma='auto', kernel='rbf',class_weight="balanced",probability=True)
# fit the model with data
svm_model.fit(X_train,y_train)
y_pred=svm_model.predict(X_test)
y_pred_proba = svm_model.predict_proba(X_test)[::,1] # platt values
y_pred_proba_dis = svm_model.decision_function(X_test) # An SVM returns a real-valued prediction for each of the input data samples, which corresponds to its distance from the separating hyperplane.
# decision_function SORTS the results from most probable class to the least probable one.
all_y_test.append(y_test)
all_y_pred_proba.append(y_pred_proba)
all_y_pred_proba_dis.append(y_pred_proba_dis)
all_y_pred.append(y_pred)
all_y_names.append(X_test.index)
all_y_test = np.concatenate(all_y_test)
all_y_pred_proba = np.concatenate(all_y_pred_proba)
all_y_pred_proba_dis = np.concatenate(all_y_pred_proba_dis)
all_y_names = np.concatenate(all_y_names)
all_y_pred = np.concatenate(all_y_pred)
percentage_go_annotations = sum(all_y_test)/len(all_y_test)
logits_apriori=np.log(percentage_go_annotations/(1-percentage_go_annotations))
logits_apost= np.log(all_y_pred_proba/(1-all_y_pred_proba))
delta_logits = logits_apost-logits_apriori
platt_matrix.loc[goterm,all_y_names] = all_y_pred_proba
distance_matrix.loc[goterm,all_y_names] = all_y_pred_proba_dis
slim_matrix_single_neuron.loc[goterm,all_y_names] = all_y_test
preds_svm_matrix.loc[goterm,all_y_names] = all_y_pred
delta_logits_matrix.loc[goterm,all_y_names] = delta_logits
GO_terms_auc_delta_logits[goterm] = metrics.roc_auc_score(all_y_test, delta_logits)
GO_terms_auc_svm[goterm] = metrics.roc_auc_score(all_y_test, all_y_pred_proba)
precision, recall, thresholds = metrics.precision_recall_curve(all_y_test, all_y_pred_proba)
GO_terms_aupr_svm[goterm] = metrics.auc(recall, precision)
GO_terms_precision_svm[goterm] = metrics.precision_score(all_y_test, all_y_pred)# done with platt values
GO_terms_auc_svm_df = pd.DataFrame(list(GO_terms_auc_svm.items()),columns = ['goterm','auc']).set_index("goterm")
GO_terms_auc_svm_df = GO_terms_auc_svm_df.dropna()
GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False).head()| auc | |
|---|---|
| goterm | |
| GO:0036289 | 0.999708 |
| GO:0060440 | 0.994743 |
| GO:0042149 | 0.971292 |
| GO:1902455 | 0.969545 |
| GO:0001556 | 0.965979 |
print("There are " +str(len(GO_terms_auc_svm_df))+ " svm models.")There are 939 svm models.
# only keep goterms that have a model
platt_matrix = platt_matrix.loc[list(GO_terms_auc_svm_df.index),:]
distance_matrix = distance_matrix.loc[list(GO_terms_auc_svm_df.index),:]
slim_matrix_single_neuron = slim_matrix_single_neuron.loc[list(GO_terms_auc_svm_df.index),:]
preds_svm_matrix = preds_svm_matrix.loc[list(GO_terms_auc_svm_df.index),:]
delta_logits_matrix = delta_logits_matrix.loc[list(GO_terms_auc_svm_df.index),:]AUC histogram
sns.set(rc={'figure.figsize':(10,6)})
fig, ax = plt.subplots()
perc = str(round((100*len(GO_terms_auc_svm_df[GO_terms_auc_svm_df["auc"]>0.69])/len(GO_terms_auc_svm_df)),2))+"%"
N, bins, patches = plt.hist(GO_terms_auc_svm_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
if bins[i]>0.69:
patches[i].set_facecolor(CB_color_cycle[2])
plt.yticks(fontsize=16)
plt.xticks(fontsize=16)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')
# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=True)
# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)
plt.xlabel("AUC value", fontsize=20)
plt.ylabel("Number of GO term models", fontsize=20)
colors2 = {'GO term models with AUC>=0.7':CB_color_cycle[2]}
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
plt.legend(handles, labels,fontsize=20, loc="lower left", bbox_to_anchor=(0.35,-0.35))
plt.text(0.71, 8, str(perc), fontsize=20,color='#333333')
plt.title("Overall performance of the models using expression", fontsize=24)
# con el que mejor funciona es con la suma normal del attribution
fig.tight_layout()
fig.savefig(resultsdir+'modelsAUCsvm.png', transparent=True)
AUC waterfall plot
GO_terms_auc_svm_df =GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False)plt.rcParams['figure.figsize'] = (12, 9)
drugs = GO_terms_auc_svm_df.index
rhos = GO_terms_auc_svm_df["auc"]
percentage = round((sum(rhos>0.69)/len(rhos))*100,1)
fig, ax = plt.subplots()
#colors = ['#208EA3' if (x < 0.5) else '#A4C61A' for x in rhos ]
colors = ['#C9C9C9' if (x < 0.69) else "#6492CA" for x in rhos ]
ax.bar(
x=drugs,
height=rhos,
edgecolor=colors,
linewidth=2
)
plt.xticks([])
plt.yticks(fontsize=28)
# First, let's remove the top, right and left spines (figure borders)
# which really aren't necessary for a bar chart.
# Also, make the bottom spine gray instead of black.
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
#ax.spines['bottom'].set_color('#DDDDDD')
# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=False)
# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(False)
ax.yaxis.grid(False)
#ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)
# Add labels and a title. Note the use of `labelpad` and `pad` to add some
# extra space between the text and the tick labels.
ax.set_xlabel('SVM models', labelpad=-30, color='#333333',fontsize=50)
ax.set_ylabel('AUC-ROC value', labelpad=15, color='#333333',fontsize=50)
ax.set_title('', color='#333333',
weight='bold')
colors2 = {'High confidence drugs (r>0.5)':'#A4C61A'}
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
#plt.legend(handles, labels,fontsize=40, loc="lower left",bbox_to_anchor=(0, -0.215))
plt.text(77, 0.32, str(percentage)+"%", fontsize=60,color='#000000')
plt.ylim((-0.1,1.1))
# Make the chart fill out the figure better.
fig.tight_layout()
fig.savefig(resultsdir+'WaterfallModelsSVM.png', transparent=True)
AUC boxplot by parents
# Add number of parents
number_parents = {}
levels = {}
for i in range(0,len(GO_terms_auc_svm_df.index)):
term = GO_terms_auc_svm_df.index[i]
number_parents[GO_terms_auc_svm_df.index[i]]=len([source for source, _ in dG.in_edges(term)])
levels[GO_terms_auc_svm_df.index[i]]=level_number[term]-1
levels = pd.DataFrame.from_dict(levels, orient='index')
number_parents = pd.DataFrame.from_dict(number_parents, orient='index')
GO_terms_auc_svm_df = pd.concat([GO_terms_auc_svm_df, levels,number_parents], axis=1)
GO_terms_auc_svm_df.columns = ["auc","levels","parents"]GO_terms_auc_svm_df.head()| auc | levels | parents | |
|---|---|---|---|
| GO:0000077 | 0.284021 | 1 | 3 |
| GO:0045737 | 0.835954 | 0 | 8 |
| GO:0000082 | 0.732331 | 2 | 4 |
| GO:1900087 | 0.593301 | 0 | 10 |
| GO:2000134 | 0.865329 | 1 | 9 |
import plotly.express as px
c = ['#E8384F', '#FD817D', '#FDAE33',
'#EECC16', '#A4C61A', '#37A862',"#208EA3","#3B6EAB"]
df = px.data.tips()
fig = px.box(GO_terms_auc_svm_df, x="levels", y="auc",
color="levels",
color_discrete_sequence=c,
width =600,
height=400,
template="simple_white",
labels=dict(levels="Level of GO hierarchy", auc="AUC-ROC")
)
fig.update_traces(width=0.9)
fig.add_shape( # add a horizontal "target" line
type="line", line_color="salmon", line_width=3, opacity=1, line_dash="dot",
x0=0, x1=1, xref="paper", y0=0.7, y1=0.7, yref="y"
)
fig.update_layout(
title=dict(text="<b> AUC value grouped by level of GO hierarchy <b>",
x=0.5,
y=0.9,
font=dict(size=18),
xanchor='center',
yanchor='top'),
xaxis=dict(ticks="", showticklabels=False, showgrid=False, zeroline=False),
yaxis=dict(ticks="", showticklabels=True, showgrid=True, zeroline=False),
# yaxis_range=[min(yy.flatten()),max(yy.flatten())],
# xaxis_range=[min(xx.flatten()),max(xx.flatten())],
legend=dict(x=1.1, y=1, orientation="v",font=dict(size=11)),
paper_bgcolor='rgba(0,0,0,0)',
font=dict(family='Roboto',color= "#36382E",size=15)
)
fig.show()TOP 15 PREDICTED GO TERMS
top15goterms= np.array(GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False)[0:15].index)Get Top GO term names
top15goterms_1 = []
for goterm in top15goterms:
top15goterms_1.append(goterm+"_"+str(1))
real_go_info_mod_best = real_go_info[real_go_info.GO_term.isin(top15goterms_1)]
real_go_info_mod_best.GO_term = real_go_info_mod_best.GO_term.str.replace("_1","")top15goterms_auc = GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False)[0:15].reset_index()
top15goterms_auc.columns=["GO_term","auc","levels","parents"]top15goterms_auc = top15goterms_auc.merge(real_go_info_mod_best[real_go_info_mod_best["GO_term"].isin(top15goterms)], on="GO_term")top15goterms_auc| GO_term | auc | levels | parents | Name | layer_number | |
|---|---|---|---|---|---|---|
| 0 | GO:0036289 | 0.999708 | 0 | 2 | Peptidyl-serine autophosphorylation (1) | 0.0 |
| 1 | GO:0060440 | 0.994743 | 0 | 4 | Trachea formation (1) | 0.0 |
| 2 | GO:0042149 | 0.971292 | 0 | 1 | Cellular response to glucose starvation (1) | 0.0 |
| 3 | GO:1902455 | 0.969545 | 0 | 2 | Negative regulation of stem cell population maintenance (1) | 0.0 |
| 4 | GO:0001556 | 0.965979 | 0 | 6 | Oocyte maturation (1) | 0.0 |
| 5 | GO:0045636 | 0.955115 | 0 | 6 | Positive regulation of melanocyte differentiation (1) | 0.0 |
| 6 | GO:0010750 | 0.955000 | 0 | 4 | Positive regulation of nitric oxide mediated signal transduction (1) | 0.0 |
| 7 | GO:0060020 | 0.949434 | 0 | 1 | Bergmann glial cell differentiation (1) | 0.0 |
| 8 | GO:1902042 | 0.945804 | 0 | 4 | Negative regulation of extrinsic apoptotic signaling pathway via death domain receptors (1) | 0.0 |
| 9 | GO:1902236 | 0.941667 | 0 | 12 | Negative regulation of endoplasmic reticulum stress-induced intrinsic apoptotic signaling pathway (1) | 0.0 |
| 10 | GO:0070059 | 0.936432 | 1 | 2 | Intrinsic apoptotic signaling pathway in response to endoplasmic reticulum stress (1) | 1.0 |
| 11 | GO:0051453 | 0.935521 | 1 | 2 | Regulation of intracellular ph (1) | 1.0 |
| 12 | GO:0042659 | 0.931364 | 0 | 3 | Regulation of cell fate specification (1) | 0.0 |
| 13 | GO:0006360 | 0.930046 | 2 | 7 | Transcription by rna polymerase i (1) | 2.0 |
| 14 | GO:0006959 | 0.921730 | 2 | 2 | Humoral immune response (1) | 2.0 |
WORST 15 PREDICTED GO TERMS
worst15goterms= np.array(GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=True)[0:15].index)Get Worst GO term names
worst15goterms_1 = []
for goterm in worst15goterms:
worst15goterms_1.append(goterm+"_"+str(1))
real_go_info_mod_worst = real_go_info[real_go_info.GO_term.isin(worst15goterms_1)]
real_go_info_mod_worst.GO_term = real_go_info_mod_worst.GO_term.str.replace("_1","")worst15goterms_auc = GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=True)[0:15].reset_index()
worst15goterms_auc.columns=["GO_term","auc","levels","parents"]worst15goterms_auc.merge(real_go_info_mod_worst[real_go_info_mod_worst["GO_term"].isin(worst15goterms)], on="GO_term")| GO_term | auc | levels | parents | Name | layer_number | |
|---|---|---|---|---|---|---|
| 0 | GO:0000077 | 0.284021 | 1 | 3 | Dna damage checkpoint signaling (1) | 1.0 |
| 1 | GO:0006869 | 0.299648 | 3 | 2 | Lipid transport (1) | 3.0 |
| 2 | GO:0051302 | 0.314545 | 1 | 2 | Regulation of cell division (1) | 1.0 |
| 3 | GO:0016485 | 0.318636 | 3 | 5 | Protein processing (1) | 3.0 |
| 4 | GO:0019722 | 0.322272 | 2 | 1 | Calcium-mediated signaling (1) | 2.0 |
| 5 | GO:0046854 | 0.326276 | 1 | 2 | Phosphatidylinositol phosphate biosynthetic process (1) | 1.0 |
| 6 | GO:0060740 | 0.331825 | 1 | 6 | Prostate gland epithelium morphogenesis (1) | 1.0 |
| 7 | GO:0060444 | 0.347273 | 1 | 8 | Branching involved in mammary gland duct morphogenesis (1) | 1.0 |
| 8 | GO:0006919 | 0.352725 | 1 | 3 | Activation of cysteine-type endopeptidase activity involved in apoptotic process (1) | 1.0 |
| 9 | GO:0032436 | 0.353421 | 1 | 14 | Positive regulation of proteasomal ubiquitin-dependent protein catabolic process (1) | 1.0 |
| 10 | GO:0055119 | 0.353947 | 1 | 1 | Relaxation of cardiac muscle (1) | 1.0 |
| 11 | GO:0001892 | 0.355979 | 1 | 5 | Embryonic placenta development (1) | 1.0 |
| 12 | GO:0031295 | 0.364518 | 0 | 8 | T cell costimulation (1) | 0.0 |
| 13 | GO:0046620 | 0.365476 | 1 | 3 | Regulation of organ growth (1) | 1.0 |
| 14 | GO:0008361 | 0.367423 | 2 | 1 | Regulation of cell size (1) | 2.0 |
AUPR histogram
GO_terms_aupr_svm_df = pd.DataFrame(list(GO_terms_aupr_svm.items()),columns = ['goterm','aupr']).set_index("goterm")
GO_terms_aupr_svm_df = GO_terms_aupr_svm_df.dropna()
GO_terms_aupr_svm_df.sort_values(by=["aupr"], ascending=False).head()| aupr | |
|---|---|
| goterm | |
| GO:0036289 | 0.996209 |
| GO:0006807 | 0.945077 |
| GO:0050896 | 0.921869 |
| GO:0043170 | 0.909722 |
| GO:0009058 | 0.900903 |
# Add number of parents
number_parents = {}
levels = {}
for i in range(0,len(GO_terms_aupr_svm_df.index)):
term = GO_terms_aupr_svm_df.index[i]
number_parents[GO_terms_aupr_svm_df.index[i]]=len([source for source, _ in dG.in_edges(term)])
levels[GO_terms_aupr_svm_df.index[i]]=level_number[term]-1
levels = pd.DataFrame.from_dict(levels, orient='index')
number_parents = pd.DataFrame.from_dict(number_parents, orient='index')
GO_terms_aupr_svm_df = pd.concat([GO_terms_aupr_svm_df, levels,number_parents], axis=1)
GO_terms_aupr_svm_df.columns = ["aupr","levels","parents"]c = ['#E8384F', '#FD817D', '#FDAE33',
'#EECC16', '#A4C61A', '#37A862',"#208EA3","#3B6EAB"]
df = px.data.tips()
fig = px.box(GO_terms_aupr_svm_df, x="levels", y="aupr",
color="levels",
color_discrete_sequence=c,
width =600,
height=400,
template="simple_white",
labels=dict(levels="Level of GO hierarchy", aupr="AUPR")
)
fig.update_traces(width=0.9)
fig.add_shape( # add a horizontal "target" line
type="line", line_color="salmon", line_width=3, opacity=1, line_dash="dot",
x0=0, x1=1, xref="paper", y0=0.7, y1=0.7, yref="y"
)
fig.update_layout(
title=dict(text="<b> AUPR value grouped by level of GO hierarchy <b>",
x=0.5,
y=0.9,
font=dict(size=18),
xanchor='center',
yanchor='top'),
xaxis=dict(ticks="", showticklabels=False, showgrid=False, zeroline=False),
yaxis=dict(ticks="", showticklabels=True, showgrid=True, zeroline=False),
# yaxis_range=[min(yy.flatten()),max(yy.flatten())],
# xaxis_range=[min(xx.flatten()),max(xx.flatten())],
legend=dict(x=1.1, y=1, orientation="v",font=dict(size=11)),
paper_bgcolor='rgba(0,0,0,0)',
font=dict(family='Roboto',color= "#36382E",size=15)
)
fig.show()
pio.write_image(fig, resultsdir+"AUPR_levels.png", width=600, height=400,scale=8)Example prediction
def f2(goterm):
return gotermcombobox_go = interactive(f2, goterm=widgets.Combobox(options=list(GO_terms_auc_svm_df.sort_values(by=["auc"], ascending=False).index)))Choose drug to study…
display(combobox_go)selected_go = combobox_go.result#auc
plt.rcParams['figure.figsize'] = (4, 2)
fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[selected_go], platt_matrix.loc[selected_go])
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[selected_go], platt_matrix.loc[selected_go])
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()
plot = pd.concat([pd.DataFrame(slim_matrix_single_neuron.loc[selected_go]),pd.DataFrame(platt_matrix.loc[selected_go])], axis=1)
plot.columns = ["slim","probability"]
ax = sns.boxplot(x="slim", y="probability", data=plot,showfliers=False )

#auc
fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[selected_go], delta_logits_matrix.loc[selected_go])
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[selected_go], delta_logits_matrix.loc[selected_go])
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()
plot = pd.concat([pd.DataFrame(slim_matrix_single_neuron.loc[selected_go]),pd.DataFrame(delta_logits_matrix.loc[selected_go])], axis=1)
plot.columns = ["slim","probability"]
ax = sns.boxplot(x="slim", y="probability", data=plot,showfliers=False )

plt.rcParams['figure.figsize'] = (2, 2)
metrics.ConfusionMatrixDisplay.from_predictions(slim_matrix_single_neuron.loc[selected_go], preds_svm_matrix.loc[selected_go])
plt.grid(visible=None)
print("Accuracy:",metrics.accuracy_score(slim_matrix_single_neuron.loc[selected_go], preds_svm_matrix.loc[selected_go]))
print("Precision:",metrics.precision_score(slim_matrix_single_neuron.loc[selected_go], preds_svm_matrix.loc[selected_go]))
print("Recall:",metrics.recall_score(slim_matrix_single_neuron.loc[selected_go], preds_svm_matrix.loc[selected_go])) #TP / (TP+FN)
print("AUC with score:",auc) #TP / (TP+FN)Accuracy: 0.9782608695652174
Precision: 0.7894736842105263
Recall: 0.9375
AUC with score: 0.9947429906542057

TN - FP
FN - TP
plt.rcParams['figure.figsize'] = (4, 2)
precision, recall, thresholds = metrics.precision_recall_curve(slim_matrix_single_neuron.loc[selected_go], preds_svm_matrix.loc[selected_go])
auc_precision_recall = metrics.auc(recall, precision)
plt.plot(recall, precision,label=str(auc_precision_recall))
plt.legend(loc=4)
plt.show()
METRICS drugs
auc_drugs = {}
aupr_drugs = {}
precision_drugs = {}
for drug in list(slim_matrix_single_neuron.columns):
if slim_matrix_single_neuron.loc[:,drug].sum() ==0:
continue
#fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[:,drug], logits_matrix.loc[:,drug])
#auc_drugs[drug] = metrics.auc(fpr, tpr)
auc_drugs[drug] = metrics.roc_auc_score(slim_matrix_single_neuron.loc[:,drug], platt_matrix.loc[:,drug])
precision, recall, thresholds = metrics.precision_recall_curve(slim_matrix_single_neuron.loc[:,drug], platt_matrix.loc[:,drug])
aupr_drugs[drug] = metrics.auc(recall, precision)
precision_drugs[drug] = metrics.precision_score(slim_matrix_single_neuron.loc[:,drug], preds_svm_matrix.loc[:,drug])
auc_drugs_df = pd.DataFrame(list(auc_drugs.items()),columns = ['goterm','auc']).set_index("goterm")
auc_drugs_df = auc_drugs_df.dropna()
aupr_drugs_df = pd.DataFrame(list(aupr_drugs.items()),columns = ['goterm','aupr']).set_index("goterm")
aupr_drugs_df = aupr_drugs_df.dropna()
precision_drugs_df = pd.DataFrame(list(precision_drugs.items()),columns = ['goterm','precision']).set_index("goterm")
precision_drugs_df = precision_drugs_df.dropna()AUC histogram drugs
sns.set(rc={'figure.figsize':(10,6)})
fig, ax = plt.subplots()
perc = str(round((100*len(auc_drugs_df[auc_drugs_df["auc"]>0.7])/len(auc_drugs_df)),2))+"%"
N, bins, patches = plt.hist(auc_drugs_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
if bins[i]>0.7:
patches[i].set_facecolor(CB_color_cycle[5])
plt.yticks(fontsize=16)
plt.xticks(fontsize=16)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_color('#DDDDDD')
# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=True)
# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(True)
ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)
plt.xlabel("AUC value", fontsize=20)
plt.ylabel("Number of drugs", fontsize=20)
colors2 = {'Drugs with AUC>=0.7':CB_color_cycle[5]}
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
plt.legend(handles, labels,fontsize=20, loc="lower left", bbox_to_anchor=(0.35,-0.35))
plt.text(0.79, 6, str(perc), fontsize=20,color='#333333')
plt.title("Overall performance by drugs using mutations", fontsize=24)
# con el que mejor funciona es con la suma normal del attribution
fig.tight_layout()
fig.savefig(resultsdir+'drugsAUC.png', transparent=True)
AUC waterfall plot drugs
auc_drugs_df =auc_drugs_df.sort_values(by=["auc"], ascending=False)plt.rcParams['figure.figsize'] = (12, 9)
drugs = auc_drugs_df.index
rhos = auc_drugs_df["auc"]
percentage = round((sum(rhos>0.69)/len(rhos))*100,1)
fig, ax = plt.subplots()
#colors = ['#208EA3' if (x < 0.5) else '#A4C61A' for x in rhos ]
colors = ['#C9C9C9' if (x < 0.69) else "#B678BE" for x in rhos ]
ax.bar(
x=drugs,
height=rhos,
edgecolor=colors,
linewidth=3
)
plt.xticks([])
plt.yticks(fontsize=28)
# First, let's remove the top, right and left spines (figure borders)
# which really aren't necessary for a bar chart.
# Also, make the bottom spine gray instead of black.
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.spines['bottom'].set_visible(False)
#ax.spines['bottom'].set_color('#DDDDDD')
# Second, remove the ticks as well.
ax.tick_params(bottom=False, left=False)
# Third, add a horizontal grid (but keep the vertical grid hidden).
# Color the lines a light gray as well.
ax.set_axisbelow(False)
ax.yaxis.grid(False)
#ax.yaxis.grid(True, color='#EEEEEE')
ax.xaxis.grid(False)
# Add labels and a title. Note the use of `labelpad` and `pad` to add some
# extra space between the text and the tick labels.
ax.set_xlabel('Drugs', labelpad=-30, color='#333333',fontsize=50)
ax.set_ylabel('AUC-ROC value', labelpad=15, color='#333333',fontsize=50)
ax.set_title('', color='#333333',
weight='bold')
colors2 = {'High confidence drugs (r>0.5)':'#A4C61A'}
labels = list(colors2.keys())
handles = [plt.Rectangle((0,0),1,1, color=colors2[label]) for label in labels]
#plt.legend(handles, labels,fontsize=40, loc="lower left",bbox_to_anchor=(0, -0.215))
plt.text(77, 0.32, str(percentage)+"%", fontsize=60,color='#000000')
plt.ylim((-0.1,1.1))
# Make the chart fill out the figure better.
fig.tight_layout()
fig.savefig(resultsdir+'WaterfallModelsSVM_drugs.png', transparent=True)
AUPR histogram drugs
sns.set(rc={'figure.figsize':(5,3)})
perc = str(round((100*len(aupr_drugs_df[aupr_drugs_df["aupr"]>0.69])/len(aupr_drugs_df)),2))+"%"
N, bins, patches = plt.hist(aupr_drugs_df, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
if bins[i]>0.69:
patches[i].set_facecolor(CB_color_cycle[3])
plt.xlabel("AUPR drugs", fontsize=16)
plt.title(perc, fontsize=16)Text(0.5, 1.0, '33.62%')

Example drug prediction
def f(drug):
return drugpredictions_nodes = []
for goterm in list(platt_matrix.index):
predictions_nodes.append(goterm+"_"+str(1))# add names to go terms
real_go_info_svm= real_go_info[real_go_info.GO_term.isin(predictions_nodes)]
real_go_info_svm.GO_term = real_go_info_svm.GO_term.str.replace("_1","")combobox = interactive(f, drug=widgets.Combobox(options=list(precision_drugs_df.sort_values(by=["precision"], ascending=False).index)))Choose drug to study…
display(combobox)selected_drug_name = combobox.resultsns.set(rc={'figure.figsize':(4,2)})
#auc
fpr, tpr, _ = metrics.roc_curve(slim_matrix_single_neuron.loc[:,selected_drug_name], platt_matrix.loc[:,selected_drug_name] )
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[:,selected_drug_name], platt_matrix.loc[:,selected_drug_name])
plt.plot(fpr,tpr,label="data 1, auc="+str(auc))
plt.legend(loc=4)
plt.show()
plot = pd.concat([pd.DataFrame(slim_matrix_single_neuron.loc[:,selected_drug_name]),pd.DataFrame(platt_matrix.loc[:,selected_drug_name])], axis=1)
plot.columns = ["slim","svm score"]
ax = sns.boxplot(x="slim", y="svm score", data=plot,showfliers=False )

plot = pd.concat([pd.DataFrame(slim_matrix.loc[:,selected_drug_name]),pd.DataFrame(attribution_data_annotated.loc[:,selected_drug_name]*1e4)], axis=1)
plot.columns = ["slim","attribution"]
ax = sns.boxplot(x="slim", y="attribution", data=plot,showfliers=True )
metrics.ConfusionMatrixDisplay.from_predictions(slim_matrix_single_neuron.loc[:,selected_drug_name].round(), preds_svm_matrix.loc[:,selected_drug_name])<sklearn.metrics._plot.confusion_matrix.ConfusionMatrixDisplay at 0x2502863c8b0>

print("Accuracy:",metrics.accuracy_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_svm_matrix.loc[:,selected_drug_name]))
print("Precision:",metrics.precision_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_svm_matrix.loc[:,selected_drug_name]))
print("Recall:",metrics.recall_score(slim_matrix_single_neuron.loc[:,selected_drug_name], preds_svm_matrix.loc[:,selected_drug_name])) #TP / (TP+FN)
print("AUC with score:",auc) Accuracy: 0.663471778487753
Precision: 0.8651026392961877
Recall: 0.5221238938053098
AUC with score: 0.9947429906542057
# LOS LOGITS DE TEST!!
train_drug_logs = pd.DataFrame(delta_logits_matrix.loc[:,selected_drug_name]).reset_index()
train_drug_logs.columns = ["GO_term","probability"]
train_drug_logs = train_drug_logs.merge(real_go_info_svm, on="GO_term")
train_drug_logs.sort_values(by=["probability"], ascending=False)Final model SVM
Once the models have been cross-validated we create the final models using all samples…
GO_terms_auc_svm_final = {}
GO_terms_aupr_svm_final = {}
GO_terms_precision_svm_final = {}
models_svm = {}
# Perform logistics
for goterm in sparseGO_terms:
#print(goterm)
goterm_drugs = slim_matrix.loc[[goterm+"_"+str(1)]].values.flatten()
if sum(goterm_drugs) <= 8:
continue
list_nodes = []
for i in range(1,7):
list_nodes.append(goterm+"_"+str(i))
score = attribution_data_annotated.loc[list_nodes].T
score_mod = score.divide(score.std()).fillna(0)
X_train = score_mod
X_test = score_mod
y_train = goterm_drugs
y_test = goterm_drugs
#gamma = 1/(X_train.shape[1]*X_train.to_numpy().var())
gamma="scale"
C=1
svm_model = svm.SVC(C=C,gamma=gamma, kernel='rbf',
class_weight="balanced",
tol=0.001,
probability=True,
random_state=1234)
# fit the model with data
svm_model.fit(X_train,y_train)
y_pred=svm_model.predict(X_test)
#auc
y_pred_proba = svm_model.predict_proba(X_test)[::,1] # platt values
#y_pred_proba = svm_model.decision_function(X_test)
GO_terms_auc_svm_final[goterm] = metrics.roc_auc_score(y_test, y_pred_proba)
precision, recall, thresholds = metrics.precision_recall_curve(y_test, y_pred_proba)
GO_terms_aupr_svm_final[goterm] = metrics.auc(recall, precision)
GO_terms_precision_svm_final[goterm] = metrics.precision_score(y_test, y_pred)
models_svm[goterm]=svm_modellen(models_svm)939
Final model AUC
GO_terms_auc_svm_df_final = pd.DataFrame(list(GO_terms_auc_svm_final.items()),columns = ['goterm','auc']).set_index("goterm")
GO_terms_auc_svm_df_final = GO_terms_auc_svm_df_final.dropna()
GO_terms_auc_svm_df_final.sort_values(by=["auc"], ascending=False)| auc | |
|---|---|
| goterm | |
| GO:0036289 | 1.000000 |
| GO:0060440 | 0.998540 |
| GO:0043162 | 0.995455 |
| GO:0070059 | 0.994760 |
| GO:0071364 | 0.994109 |
| GO:1901029 | 0.994048 |
| GO:0072384 | 0.993636 |
| GO:0051453 | 0.993393 |
| GO:0001556 | 0.991972 |
| GO:0090201 | 0.991808 |
| GO:0010750 | 0.990909 |
| GO:0016573 | 0.990783 |
| GO:1903800 | 0.990573 |
| GO:1904950 | 0.989945 |
| GO:1902455 | 0.989091 |
| GO:0042149 | 0.987697 |
| GO:0034983 | 0.987273 |
| GO:1990403 | 0.985909 |
| GO:0071353 | 0.985587 |
| GO:0006275 | 0.984226 |
| GO:0010971 | 0.984091 |
| GO:0006869 | 0.983409 |
| GO:0001779 | 0.983182 |
| GO:0051973 | 0.981651 |
| GO:0060749 | 0.980895 |
| GO:0042771 | 0.980633 |
| GO:0072655 | 0.980455 |
| GO:0061734 | 0.980455 |
| GO:0045636 | 0.980178 |
| GO:0045737 | 0.980084 |
| GO:1902236 | 0.979762 |
| GO:0060632 | 0.979545 |
| GO:0016575 | 0.978731 |
| GO:0042659 | 0.977727 |
| GO:0046628 | 0.977376 |
| GO:1902042 | 0.977273 |
| GO:0098780 | 0.975909 |
| GO:0046902 | 0.975849 |
| GO:0051607 | 0.975552 |
| GO:0006401 | 0.974678 |
| GO:0017157 | 0.974040 |
| GO:0032740 | 0.973856 |
| GO:0006270 | 0.973848 |
| GO:0046666 | 0.973570 |
| GO:0008045 | 0.972603 |
| GO:0006303 | 0.972553 |
| GO:0042177 | 0.972431 |
| GO:0060020 | 0.972290 |
| GO:0006360 | 0.972095 |
| GO:2001021 | 0.971520 |
| GO:0042733 | 0.971364 |
| GO:0016572 | 0.971342 |
| GO:0070932 | 0.970909 |
| GO:2001257 | 0.970909 |
| GO:0001782 | 0.970384 |
| GO:0006261 | 0.970112 |
| GO:1905564 | 0.969834 |
| GO:2000757 | 0.969545 |
| GO:0051354 | 0.969091 |
| GO:0072284 | 0.969069 |
| GO:0051926 | 0.968891 |
| GO:0043407 | 0.968585 |
| GO:0034394 | 0.968096 |
| GO:0050870 | 0.967621 |
| GO:0046898 | 0.967143 |
| GO:0031047 | 0.967115 |
| GO:0016925 | 0.966364 |
| GO:0035790 | 0.966361 |
| GO:0006417 | 0.965261 |
| GO:0032469 | 0.965008 |
| GO:0035195 | 0.964816 |
| GO:0021782 | 0.964091 |
| GO:0070584 | 0.963810 |
| GO:0051384 | 0.961083 |
| GO:0002326 | 0.960811 |
| GO:2000773 | 0.960310 |
| GO:0050729 | 0.959779 |
| GO:0046942 | 0.959480 |
| GO:0035249 | 0.959091 |
| GO:0045821 | 0.958904 |
| GO:0099111 | 0.958880 |
| GO:0071670 | 0.958851 |
| GO:0006367 | 0.958333 |
| GO:1905278 | 0.958270 |
| GO:0010559 | 0.957929 |
| GO:0006959 | 0.957854 |
| GO:0018205 | 0.957782 |
| GO:0035860 | 0.957768 |
| GO:0031640 | 0.957381 |
| GO:0007059 | 0.957268 |
| GO:0070373 | 0.956762 |
| GO:0030282 | 0.956762 |
| GO:0001658 | 0.956522 |
| GO:0030890 | 0.956075 |
| GO:0035754 | 0.955757 |
| GO:0010832 | 0.955455 |
| GO:0099173 | 0.955238 |
| GO:0021695 | 0.955238 |
| GO:0045727 | 0.955026 |
| GO:0002862 | 0.954696 |
| GO:0014827 | 0.954432 |
| GO:0016579 | 0.953923 |
| GO:0002718 | 0.953854 |
| GO:0071320 | 0.953746 |
| GO:0051281 | 0.953182 |
| GO:0042552 | 0.953182 |
| GO:0000086 | 0.953095 |
| GO:0032147 | 0.952991 |
| GO:0032436 | 0.952499 |
| GO:0010592 | 0.952273 |
| GO:0006694 | 0.951735 |
| GO:0033141 | 0.951735 |
| GO:0071480 | 0.951429 |
| GO:0006612 | 0.951118 |
| GO:0048011 | 0.950729 |
| GO:1903077 | 0.950714 |
| GO:0033619 | 0.950455 |
| GO:0006352 | 0.950306 |
| GO:0001662 | 0.950221 |
| GO:0010039 | 0.950040 |
| GO:0090314 | 0.949147 |
| GO:0034502 | 0.949074 |
| GO:0014823 | 0.948954 |
| GO:2001240 | 0.948220 |
| GO:0007617 | 0.948182 |
| GO:0032743 | 0.947281 |
| GO:0006310 | 0.947141 |
| GO:0006605 | 0.946678 |
| GO:0006975 | 0.946204 |
| GO:2000739 | 0.946101 |
| GO:1902459 | 0.945909 |
| GO:0007626 | 0.945701 |
| GO:0023019 | 0.945116 |
| GO:0003376 | 0.944700 |
| GO:0006576 | 0.944346 |
| GO:0038007 | 0.943690 |
| GO:0050728 | 0.943637 |
| GO:0032922 | 0.942661 |
| GO:0045740 | 0.942465 |
| GO:1900118 | 0.942381 |
| GO:0010952 | 0.942143 |
| GO:1905710 | 0.942143 |
| GO:1902166 | 0.942128 |
| GO:0008637 | 0.941950 |
| GO:2000010 | 0.941865 |
| GO:0055118 | 0.941679 |
| GO:0000423 | 0.941364 |
| GO:0043154 | 0.941156 |
| GO:0048701 | 0.940775 |
| GO:0008210 | 0.940749 |
| GO:1900272 | 0.940171 |
| GO:0060997 | 0.939809 |
| GO:0007263 | 0.939545 |
| GO:2000379 | 0.939167 |
| GO:1900020 | 0.939091 |
| GO:0050896 | 0.938915 |
| GO:0016485 | 0.938636 |
| GO:0043966 | 0.938376 |
| GO:0002437 | 0.938295 |
| GO:2000300 | 0.937318 |
| GO:0140013 | 0.937095 |
| GO:0034767 | 0.936758 |
| GO:0031648 | 0.936624 |
| GO:0007026 | 0.936364 |
| GO:0032024 | 0.936149 |
| GO:0030193 | 0.936040 |
| GO:0010212 | 0.935098 |
| GO:0006457 | 0.934641 |
| GO:0032729 | 0.934420 |
| GO:0030593 | 0.934413 |
| GO:0010575 | 0.934272 |
| GO:0008064 | 0.933643 |
| GO:0008286 | 0.932331 |
| GO:0001818 | 0.932128 |
| GO:0030513 | 0.931404 |
| GO:0060766 | 0.931364 |
| GO:0006396 | 0.931346 |
| GO:0006919 | 0.931342 |
| GO:0038096 | 0.930886 |
| GO:0001553 | 0.930810 |
| GO:0045580 | 0.930407 |
| GO:0046326 | 0.930406 |
| GO:0035025 | 0.930294 |
| GO:1903146 | 0.929091 |
| GO:0060444 | 0.929091 |
| GO:0006412 | 0.928571 |
| GO:0048536 | 0.928290 |
| GO:0002819 | 0.927685 |
| GO:0048704 | 0.927370 |
| GO:0051054 | 0.927333 |
| GO:0090184 | 0.927099 |
| GO:1900006 | 0.926941 |
| GO:2000134 | 0.926917 |
| GO:0046889 | 0.926822 |
| GO:0043123 | 0.926512 |
| GO:0070842 | 0.926364 |
| GO:0046329 | 0.926364 |
| GO:0006898 | 0.925891 |
| GO:0006368 | 0.925841 |
| GO:1905897 | 0.925743 |
| GO:0030048 | 0.925591 |
| GO:0042180 | 0.925076 |
| GO:0035909 | 0.924883 |
| GO:0051209 | 0.924065 |
| GO:0030308 | 0.923951 |
| GO:0043170 | 0.923707 |
| GO:0035726 | 0.922783 |
| GO:0031663 | 0.922727 |
| GO:0000209 | 0.922119 |
| GO:0009165 | 0.921544 |
| GO:0002720 | 0.921427 |
| GO:0006096 | 0.921292 |
| GO:1902036 | 0.921254 |
| GO:0071549 | 0.921066 |
| GO:0007528 | 0.920950 |
| GO:0090090 | 0.920930 |
| GO:0042472 | 0.920455 |
| GO:0031056 | 0.920429 |
| GO:0050864 | 0.920262 |
| GO:0060789 | 0.920000 |
| GO:0007389 | 0.919762 |
| GO:0048743 | 0.919572 |
| GO:0030705 | 0.919116 |
| GO:0060179 | 0.919091 |
| GO:0045739 | 0.918823 |
| GO:0043627 | 0.917977 |
| GO:0040018 | 0.917659 |
| GO:2001243 | 0.917078 |
| GO:0090037 | 0.917056 |
| GO:0040016 | 0.915987 |
| GO:0043552 | 0.915951 |
| GO:0001666 | 0.915013 |
| GO:0010508 | 0.914755 |
| GO:0033690 | 0.914545 |
| GO:0098586 | 0.914419 |
| GO:0043922 | 0.914091 |
| GO:0035994 | 0.914021 |
| GO:0031398 | 0.913694 |
| GO:0042093 | 0.913524 |
| GO:0032410 | 0.913182 |
| GO:1901224 | 0.913182 |
| GO:0006839 | 0.913167 |
| GO:0045907 | 0.912844 |
| GO:2000278 | 0.912619 |
| GO:2001236 | 0.912563 |
| GO:0048170 | 0.912474 |
| GO:0071839 | 0.912217 |
| GO:0031507 | 0.911552 |
| GO:0060391 | 0.911011 |
| GO:0032148 | 0.910451 |
| GO:0070102 | 0.910000 |
| GO:0030878 | 0.909762 |
| GO:0035162 | 0.909463 |
| GO:0051225 | 0.909314 |
| GO:0002931 | 0.909064 |
| GO:0007411 | 0.908683 |
| GO:0008625 | 0.908500 |
| GO:0035788 | 0.908313 |
| GO:0010921 | 0.907360 |
| GO:0048266 | 0.906977 |
| GO:0010977 | 0.906667 |
| GO:0050910 | 0.906656 |
| GO:0045732 | 0.906062 |
| GO:0046620 | 0.905714 |
| GO:0035855 | 0.905551 |
| GO:0030316 | 0.905551 |
| GO:0006469 | 0.905340 |
| GO:0090263 | 0.905136 |
| GO:0021953 | 0.904874 |
| GO:0060312 | 0.904790 |
| GO:0006260 | 0.904703 |
| GO:0030521 | 0.904434 |
| GO:0008016 | 0.904091 |
| GO:0010727 | 0.904091 |
| GO:0030509 | 0.904035 |
| GO:0007498 | 0.903914 |
| GO:0050769 | 0.903592 |
| GO:0050792 | 0.903414 |
| GO:0009582 | 0.903167 |
| GO:0007098 | 0.902745 |
| GO:0002821 | 0.902464 |
| GO:0071276 | 0.902162 |
| GO:0007286 | 0.901132 |
| GO:0045088 | 0.900952 |
| GO:0055003 | 0.900943 |
| GO:0035767 | 0.900748 |
| GO:0045987 | 0.900474 |
| GO:0061029 | 0.900474 |
| GO:0033327 | 0.900465 |
| GO:0000422 | 0.900374 |
| GO:0010976 | 0.900117 |
| GO:0008354 | 0.899895 |
| GO:0070528 | 0.899726 |
| GO:0006807 | 0.899601 |
| GO:0045833 | 0.899128 |
| GO:1905065 | 0.898923 |
| GO:0007018 | 0.898915 |
| GO:0007422 | 0.898647 |
| GO:0048484 | 0.898636 |
| GO:0032467 | 0.898182 |
| GO:0050795 | 0.897909 |
| GO:0030539 | 0.897909 |
| GO:0048538 | 0.897833 |
| GO:0032355 | 0.897646 |
| GO:0007416 | 0.897554 |
| GO:0021575 | 0.897509 |
| GO:0060348 | 0.897410 |
| GO:0001569 | 0.897282 |
| GO:0060384 | 0.897171 |
| GO:0031069 | 0.897099 |
| GO:0050918 | 0.897059 |
| GO:0035584 | 0.896905 |
| GO:0051046 | 0.896369 |
| GO:0043129 | 0.896233 |
| GO:0001843 | 0.896024 |
| GO:0046330 | 0.895444 |
| GO:0007030 | 0.895429 |
| GO:0048873 | 0.895092 |
| GO:0000724 | 0.894922 |
| GO:0007202 | 0.894511 |
| GO:1903053 | 0.894419 |
| GO:0003338 | 0.894238 |
| GO:1901990 | 0.894150 |
| GO:0060644 | 0.893917 |
| GO:0043161 | 0.893782 |
| GO:0030838 | 0.892727 |
| GO:0001946 | 0.892571 |
| GO:0072210 | 0.892039 |
| GO:0030101 | 0.892003 |
| GO:0050731 | 0.892003 |
| GO:0010613 | 0.891865 |
| GO:0030325 | 0.891865 |
| GO:0048714 | 0.891783 |
| GO:0048008 | 0.891667 |
| GO:0001823 | 0.890989 |
| GO:0016239 | 0.890496 |
| GO:0030216 | 0.890460 |
| GO:0071300 | 0.890341 |
| GO:0032008 | 0.889952 |
| GO:0061045 | 0.889881 |
| GO:0051894 | 0.889619 |
| GO:0030010 | 0.889612 |
| GO:0031016 | 0.889533 |
| GO:0001942 | 0.889526 |
| GO:1902533 | 0.889155 |
| GO:0016358 | 0.888660 |
| GO:0001501 | 0.888280 |
| GO:0051092 | 0.888251 |
| GO:0016601 | 0.887883 |
| GO:0097067 | 0.887324 |
| GO:0009306 | 0.887019 |
| GO:0048167 | 0.886555 |
| GO:0050921 | 0.886315 |
| GO:1990384 | 0.886268 |
| GO:0046883 | 0.886202 |
| GO:0007519 | 0.886154 |
| GO:0043270 | 0.885881 |
| GO:0003007 | 0.885720 |
| GO:0071900 | 0.885420 |
| GO:0007585 | 0.885391 |
| GO:2001214 | 0.885258 |
| GO:0071456 | 0.884685 |
| GO:0016567 | 0.884594 |
| GO:0060740 | 0.882856 |
| GO:0035094 | 0.882732 |
| GO:0072073 | 0.882732 |
| GO:0060612 | 0.881602 |
| GO:0060325 | 0.881498 |
| GO:0045668 | 0.881347 |
| GO:0042531 | 0.881332 |
| GO:0010038 | 0.881167 |
| GO:0071333 | 0.880972 |
| GO:0006939 | 0.880907 |
| GO:0090141 | 0.880907 |
| GO:0046718 | 0.880697 |
| GO:0051770 | 0.880461 |
| GO:0033627 | 0.880455 |
| GO:0048149 | 0.880352 |
| GO:0002685 | 0.880291 |
| GO:0043029 | 0.880195 |
| GO:0038033 | 0.879699 |
| GO:0055119 | 0.879336 |
| GO:0003300 | 0.878843 |
| GO:0005984 | 0.878788 |
| GO:0002218 | 0.878773 |
| GO:0072239 | 0.878669 |
| GO:0031103 | 0.878667 |
| GO:0048557 | 0.878638 |
| GO:1901987 | 0.878627 |
| GO:0060048 | 0.877703 |
| GO:0045637 | 0.877659 |
| GO:2001234 | 0.877406 |
| GO:0038083 | 0.876762 |
| GO:0071277 | 0.876323 |
| GO:0048839 | 0.876278 |
| GO:0000723 | 0.875714 |
| GO:0060627 | 0.875648 |
| GO:0035022 | 0.874811 |
| GO:0007435 | 0.874669 |
| GO:2001241 | 0.874309 |
| GO:0002062 | 0.874091 |
| GO:0035234 | 0.873792 |
| GO:0034976 | 0.873754 |
| GO:0007584 | 0.872411 |
| GO:0002318 | 0.872408 |
| GO:0001975 | 0.872354 |
| GO:0071230 | 0.871837 |
| GO:0034446 | 0.871788 |
| GO:0070933 | 0.871364 |
| GO:0030072 | 0.871331 |
| GO:0071897 | 0.871171 |
| GO:0035733 | 0.870478 |
| GO:0032967 | 0.870403 |
| GO:0048675 | 0.870071 |
| GO:0060571 | 0.870035 |
| GO:0050920 | 0.869917 |
| GO:0050678 | 0.869106 |
| GO:0034405 | 0.869048 |
| GO:0051150 | 0.868932 |
| GO:0001934 | 0.868720 |
| GO:0010507 | 0.868700 |
| GO:1904707 | 0.868636 |
| GO:0050821 | 0.868325 |
| GO:0006811 | 0.868262 |
| GO:0070588 | 0.868155 |
| GO:0014911 | 0.867596 |
| GO:0090280 | 0.867440 |
| GO:0008630 | 0.867386 |
| GO:1901796 | 0.867386 |
| GO:0051056 | 0.867368 |
| GO:0051321 | 0.865996 |
| GO:0051051 | 0.865833 |
| GO:0051902 | 0.865573 |
| GO:0097009 | 0.865089 |
| GO:0060271 | 0.865061 |
| GO:0045930 | 0.864995 |
| GO:0035304 | 0.864977 |
| GO:0051899 | 0.864866 |
| GO:0033028 | 0.864808 |
| GO:0018108 | 0.864767 |
| GO:1900087 | 0.864434 |
| GO:0010467 | 0.863952 |
| GO:0035019 | 0.863557 |
| GO:0006687 | 0.863557 |
| GO:0001824 | 0.863532 |
| GO:0033689 | 0.863522 |
| GO:0071392 | 0.863443 |
| GO:0035264 | 0.863252 |
| GO:0046632 | 0.862800 |
| GO:0034605 | 0.862619 |
| GO:0032091 | 0.862599 |
| GO:0072659 | 0.862358 |
| GO:0051901 | 0.861670 |
| GO:0006357 | 0.861504 |
| GO:0042475 | 0.861448 |
| GO:0045747 | 0.861374 |
| GO:0072006 | 0.860598 |
| GO:0042220 | 0.860483 |
| GO:0006937 | 0.860353 |
| GO:0006511 | 0.860111 |
| GO:0010718 | 0.859229 |
| GO:0035924 | 0.859169 |
| GO:0090398 | 0.859050 |
| GO:0031532 | 0.858981 |
| GO:1904062 | 0.858745 |
| GO:2000251 | 0.858605 |
| GO:0014068 | 0.858156 |
| GO:0048146 | 0.858102 |
| GO:0051090 | 0.857756 |
| GO:0034765 | 0.857317 |
| GO:0007229 | 0.856812 |
| GO:0007158 | 0.856712 |
| GO:1901031 | 0.856712 |
| GO:0061351 | 0.856372 |
| GO:1904019 | 0.856183 |
| GO:0048812 | 0.856107 |
| GO:0060437 | 0.855565 |
| GO:0034766 | 0.854758 |
| GO:0033143 | 0.854574 |
| GO:0007269 | 0.854497 |
| GO:0032516 | 0.854484 |
| GO:0036120 | 0.854433 |
| GO:0090068 | 0.853947 |
| GO:0046854 | 0.853881 |
| GO:0010811 | 0.853842 |
| GO:0060976 | 0.853774 |
| GO:0060045 | 0.853680 |
| GO:0021549 | 0.853311 |
| GO:0043534 | 0.853142 |
| GO:0038084 | 0.853135 |
| GO:0046427 | 0.852947 |
| GO:0030324 | 0.852866 |
| GO:0048010 | 0.852488 |
| GO:0097193 | 0.852297 |
| GO:0048286 | 0.852143 |
| GO:0006468 | 0.851852 |
| GO:0060326 | 0.851772 |
| GO:0034097 | 0.851678 |
| GO:0016071 | 0.851667 |
| GO:0036324 | 0.851085 |
| GO:1903010 | 0.851085 |
| GO:0002327 | 0.850962 |
| GO:0001570 | 0.850955 |
| GO:0043536 | 0.850601 |
| GO:0043406 | 0.850494 |
| GO:0045347 | 0.850455 |
| GO:0001701 | 0.850196 |
| GO:0019222 | 0.849913 |
| GO:0051403 | 0.849741 |
| GO:0097021 | 0.849170 |
| GO:0043467 | 0.848706 |
| GO:0045766 | 0.848621 |
| GO:0060562 | 0.848060 |
| GO:0030001 | 0.847486 |
| GO:0006810 | 0.847446 |
| GO:0031667 | 0.847070 |
| GO:0048565 | 0.846000 |
| GO:0019827 | 0.845649 |
| GO:0007565 | 0.845356 |
| GO:0009966 | 0.844893 |
| GO:0055085 | 0.844768 |
| GO:0043114 | 0.844749 |
| GO:0002548 | 0.844626 |
| GO:2000377 | 0.844341 |
| GO:0030198 | 0.844187 |
| GO:0032386 | 0.844167 |
| GO:0031929 | 0.844150 |
| GO:0035306 | 0.843956 |
| GO:0006897 | 0.843955 |
| GO:0051301 | 0.843815 |
| GO:0001656 | 0.843809 |
| GO:0042060 | 0.843773 |
| GO:0031109 | 0.843563 |
| GO:0000122 | 0.843521 |
| GO:0043124 | 0.843017 |
| GO:0001837 | 0.842638 |
| GO:1902275 | 0.841719 |
| GO:0051261 | 0.841719 |
| GO:0051924 | 0.841520 |
| GO:0002250 | 0.841465 |
| GO:0030336 | 0.841059 |
| GO:0046631 | 0.840909 |
| GO:0016055 | 0.840841 |
| GO:0033077 | 0.840735 |
| GO:0048741 | 0.840370 |
| GO:0007266 | 0.839667 |
| GO:0001938 | 0.838948 |
| GO:0043586 | 0.838898 |
| GO:0008277 | 0.837920 |
| GO:0043303 | 0.837858 |
| GO:0070662 | 0.837526 |
| GO:0060374 | 0.836916 |
| GO:0045087 | 0.836889 |
| GO:0034220 | 0.836107 |
| GO:0032388 | 0.835532 |
| GO:0048568 | 0.835305 |
| GO:0050866 | 0.835227 |
| GO:0009058 | 0.834946 |
| GO:1902074 | 0.834912 |
| GO:0043244 | 0.834906 |
| GO:0008542 | 0.834749 |
| GO:0045055 | 0.834433 |
| GO:0045444 | 0.834286 |
| GO:0046578 | 0.834019 |
| GO:0046777 | 0.833773 |
| GO:0001889 | 0.833595 |
| GO:0008584 | 0.833556 |
| GO:0045840 | 0.833536 |
| GO:0002366 | 0.833530 |
| GO:0007049 | 0.833424 |
| GO:0046474 | 0.833392 |
| GO:0019233 | 0.833182 |
| GO:0000165 | 0.832917 |
| GO:0051258 | 0.832656 |
| GO:0032956 | 0.832450 |
| GO:0022612 | 0.832326 |
| GO:0051050 | 0.832281 |
| GO:0043392 | 0.831905 |
| GO:0031274 | 0.831814 |
| GO:0051702 | 0.831506 |
| GO:0010564 | 0.831039 |
| GO:0031099 | 0.830615 |
| GO:1905563 | 0.830607 |
| GO:0030318 | 0.830136 |
| GO:0048598 | 0.829861 |
| GO:0007165 | 0.829719 |
| GO:1901988 | 0.829474 |
| GO:0007186 | 0.829429 |
| GO:0033157 | 0.829023 |
| GO:0019221 | 0.829000 |
| GO:0000278 | 0.828800 |
| GO:0042310 | 0.828784 |
| GO:1901300 | 0.828616 |
| GO:0006909 | 0.828497 |
| GO:0030154 | 0.828332 |
| GO:0002573 | 0.827001 |
| GO:0045429 | 0.826889 |
| GO:0051223 | 0.826823 |
| GO:0016570 | 0.826822 |
| GO:0030163 | 0.826442 |
| GO:0009791 | 0.826355 |
| GO:0090630 | 0.826069 |
| GO:0032409 | 0.825426 |
| GO:0048477 | 0.824868 |
| GO:0034644 | 0.824849 |
| GO:0007346 | 0.824841 |
| GO:0046651 | 0.824539 |
| GO:0051171 | 0.823977 |
| GO:0000302 | 0.823816 |
| GO:0048608 | 0.823637 |
| GO:0032940 | 0.823481 |
| GO:0008610 | 0.823469 |
| GO:0010628 | 0.823151 |
| GO:1903078 | 0.822244 |
| GO:0016032 | 0.821730 |
| GO:0009888 | 0.821458 |
| GO:0016042 | 0.821320 |
| GO:0007259 | 0.820971 |
| GO:0008544 | 0.820813 |
| GO:0000077 | 0.820719 |
| GO:0021766 | 0.820586 |
| GO:0001817 | 0.819733 |
| GO:0001932 | 0.819683 |
| GO:0002053 | 0.819493 |
| GO:0072593 | 0.819390 |
| GO:0009887 | 0.819242 |
| GO:0006753 | 0.818971 |
| GO:0071383 | 0.818684 |
| GO:0007015 | 0.818627 |
| GO:0001819 | 0.818452 |
| GO:0007275 | 0.818394 |
| GO:1903829 | 0.818083 |
| GO:0002244 | 0.818060 |
| GO:0051898 | 0.817795 |
| GO:0009410 | 0.817265 |
| GO:0030335 | 0.817025 |
| GO:0061024 | 0.816492 |
| GO:0007173 | 0.816349 |
| GO:0050900 | 0.816242 |
| GO:0060395 | 0.815909 |
| GO:0009755 | 0.815667 |
| GO:0045860 | 0.815613 |
| GO:0050872 | 0.815367 |
| GO:0007612 | 0.814548 |
| GO:0000082 | 0.814519 |
| GO:0050852 | 0.814267 |
| GO:0043408 | 0.813977 |
| GO:0002009 | 0.813874 |
| GO:0019752 | 0.813530 |
| GO:0001822 | 0.813506 |
| GO:0007179 | 0.813500 |
| GO:0051049 | 0.813439 |
| GO:0010033 | 0.813421 |
| GO:1901135 | 0.813379 |
| GO:1900180 | 0.813213 |
| GO:0033554 | 0.813172 |
| GO:0007204 | 0.813136 |
| GO:0044770 | 0.812960 |
| GO:0001755 | 0.812831 |
| GO:0001541 | 0.812614 |
| GO:0006470 | 0.811795 |
| GO:0009743 | 0.811594 |
| GO:0033993 | 0.811585 |
| GO:0035265 | 0.811041 |
| GO:0051496 | 0.811040 |
| GO:0007162 | 0.810927 |
| GO:0030218 | 0.809955 |
| GO:0006139 | 0.809816 |
| GO:0070374 | 0.808642 |
| GO:0006298 | 0.808612 |
| GO:0009056 | 0.808581 |
| GO:0070507 | 0.808431 |
| GO:0071363 | 0.808295 |
| GO:0050680 | 0.808234 |
| GO:0007169 | 0.807939 |
| GO:0001894 | 0.807870 |
| GO:0000902 | 0.806862 |
| GO:0009617 | 0.806711 |
| GO:1902904 | 0.806512 |
| GO:0030097 | 0.806125 |
| GO:0007399 | 0.805949 |
| GO:0050853 | 0.805230 |
| GO:0051726 | 0.804914 |
| GO:0008360 | 0.804780 |
| GO:0050863 | 0.804772 |
| GO:0010629 | 0.804702 |
| GO:0032880 | 0.804305 |
| GO:0021795 | 0.804198 |
| GO:0046488 | 0.804184 |
| GO:0031032 | 0.804004 |
| GO:0045595 | 0.803077 |
| GO:0006936 | 0.802344 |
| GO:0045793 | 0.802149 |
| GO:0071222 | 0.801980 |
| GO:0051897 | 0.801416 |
| GO:0006606 | 0.800953 |
| GO:0006886 | 0.800872 |
| GO:0030307 | 0.800490 |
| GO:0048738 | 0.800331 |
| GO:0010821 | 0.800220 |
| GO:0051247 | 0.800154 |
| GO:0042752 | 0.800120 |
| GO:0032835 | 0.800025 |
| GO:0033138 | 0.799982 |
| GO:1903578 | 0.799701 |
| GO:0050673 | 0.798946 |
| GO:0006997 | 0.798672 |
| GO:0060341 | 0.798662 |
| GO:0006281 | 0.798556 |
| GO:0042391 | 0.798475 |
| GO:0050808 | 0.797394 |
| GO:0007267 | 0.797360 |
| GO:0050865 | 0.797107 |
| GO:0018105 | 0.797070 |
| GO:0060560 | 0.796569 |
| GO:0071478 | 0.796131 |
| GO:0018107 | 0.796045 |
| GO:0019216 | 0.795977 |
| GO:0023061 | 0.795969 |
| GO:0036473 | 0.795897 |
| GO:0051147 | 0.795455 |
| GO:0006996 | 0.794900 |
| GO:0030217 | 0.794761 |
| GO:0070527 | 0.794579 |
| GO:0050804 | 0.793936 |
| GO:0060021 | 0.793808 |
| GO:0045321 | 0.793792 |
| GO:0046034 | 0.792891 |
| GO:1904646 | 0.792812 |
| GO:0030182 | 0.792624 |
| GO:0002764 | 0.790893 |
| GO:0007596 | 0.790844 |
| GO:0043542 | 0.790474 |
| GO:0006355 | 0.790400 |
| GO:0010638 | 0.790227 |
| GO:0042110 | 0.789916 |
| GO:2000811 | 0.789519 |
| GO:0045785 | 0.789271 |
| GO:0001952 | 0.789204 |
| GO:0048709 | 0.787833 |
| GO:0016192 | 0.787802 |
| GO:0002320 | 0.787705 |
| GO:0045944 | 0.787650 |
| GO:0035051 | 0.787216 |
| GO:0070663 | 0.786907 |
| GO:0046486 | 0.786765 |
| GO:0006914 | 0.786701 |
| GO:0071407 | 0.786480 |
| GO:0048468 | 0.786471 |
| GO:0043065 | 0.786229 |
| GO:1902532 | 0.786009 |
| GO:0033044 | 0.785934 |
| GO:0031333 | 0.785379 |
| GO:0071417 | 0.785307 |
| GO:0016241 | 0.785238 |
| GO:0007268 | 0.785105 |
| GO:0007010 | 0.785047 |
| GO:0002443 | 0.783904 |
| GO:2000270 | 0.783308 |
| GO:0001764 | 0.782709 |
| GO:0051174 | 0.781935 |
| GO:0034329 | 0.781439 |
| GO:0043549 | 0.781269 |
| GO:0010595 | 0.781136 |
| GO:2001020 | 0.780899 |
| GO:0050776 | 0.780250 |
| GO:0007159 | 0.780220 |
| GO:0048041 | 0.780105 |
| GO:0016236 | 0.779569 |
| GO:0048638 | 0.778556 |
| GO:0042551 | 0.778521 |
| GO:0007517 | 0.778474 |
| GO:0032869 | 0.777921 |
| GO:0051649 | 0.777222 |
| GO:0009725 | 0.777056 |
| GO:0030855 | 0.776398 |
| GO:0002040 | 0.776347 |
| GO:0071310 | 0.775759 |
| GO:0042063 | 0.775499 |
| GO:0009266 | 0.775262 |
| GO:0048469 | 0.774721 |
| GO:0042307 | 0.774054 |
| GO:0032879 | 0.772742 |
| GO:0002376 | 0.772696 |
| GO:0055082 | 0.772549 |
| GO:0016070 | 0.772150 |
| GO:0060840 | 0.771853 |
| GO:0010632 | 0.771656 |
| GO:0007219 | 0.771429 |
| GO:0051341 | 0.770833 |
| GO:0060416 | 0.770267 |
| GO:0090050 | 0.770256 |
| GO:0002274 | 0.770035 |
| GO:0009968 | 0.768538 |
| GO:0009416 | 0.768293 |
| GO:0009653 | 0.767978 |
| GO:0030183 | 0.767941 |
| GO:0007507 | 0.766819 |
| GO:0007283 | 0.766625 |
| GO:0048589 | 0.766590 |
| GO:0050790 | 0.766284 |
| GO:0065003 | 0.765562 |
| GO:0030032 | 0.765559 |
| GO:0048103 | 0.765258 |
| GO:0006954 | 0.764565 |
| GO:0048878 | 0.764329 |
| GO:0007420 | 0.764092 |
| GO:0030168 | 0.762921 |
| GO:0006629 | 0.761422 |
| GO:0006644 | 0.760398 |
| GO:0001525 | 0.760172 |
| GO:0120035 | 0.759979 |
| GO:0034103 | 0.759958 |
| GO:0014070 | 0.759563 |
| GO:0044255 | 0.758471 |
| GO:0051098 | 0.758377 |
| GO:0051641 | 0.757853 |
| GO:0034599 | 0.756607 |
| GO:0043473 | 0.756079 |
| GO:0036092 | 0.755500 |
| GO:0048863 | 0.755435 |
| GO:2000352 | 0.754950 |
| GO:0030162 | 0.754327 |
| GO:0042325 | 0.754119 |
| GO:0008202 | 0.754059 |
| GO:0033628 | 0.753988 |
| GO:0051146 | 0.753713 |
| GO:0010243 | 0.753077 |
| GO:0043524 | 0.752485 |
| GO:0003014 | 0.752381 |
| GO:0002684 | 0.752271 |
| GO:0001763 | 0.751863 |
| GO:0051145 | 0.751530 |
| GO:0045596 | 0.750733 |
| GO:0000226 | 0.750411 |
| GO:0031175 | 0.749603 |
| GO:0007155 | 0.749178 |
| GO:0002064 | 0.748667 |
| GO:0045597 | 0.748593 |
| GO:0040008 | 0.748313 |
| GO:0060485 | 0.746706 |
| GO:0006508 | 0.746456 |
| GO:0097191 | 0.746084 |
| GO:0016477 | 0.745481 |
| GO:0005975 | 0.745307 |
| GO:0043066 | 0.745136 |
| GO:0050890 | 0.744664 |
| GO:0007265 | 0.744598 |
| GO:0032092 | 0.743751 |
| GO:0051017 | 0.743352 |
| GO:0007005 | 0.742652 |
| GO:0043434 | 0.742583 |
| GO:0003158 | 0.742221 |
| GO:0042113 | 0.742005 |
| GO:0120162 | 0.741508 |
| GO:0051881 | 0.740169 |
| GO:0030522 | 0.740000 |
| GO:0007160 | 0.739980 |
| GO:0048511 | 0.737766 |
| GO:0044281 | 0.736585 |
| GO:0007568 | 0.736500 |
| GO:0007610 | 0.735604 |
| GO:0035556 | 0.734890 |
| GO:0048017 | 0.734281 |
| GO:0006325 | 0.734050 |
| GO:0006915 | 0.733434 |
| GO:0008284 | 0.731624 |
| GO:0045165 | 0.731183 |
| GO:0002682 | 0.730932 |
| GO:0022414 | 0.730731 |
| GO:0097190 | 0.730349 |
| GO:0051494 | 0.729426 |
| GO:0051128 | 0.728822 |
| GO:0043254 | 0.728497 |
| GO:0098609 | 0.727096 |
| GO:0006338 | 0.726229 |
| GO:0007423 | 0.725167 |
| GO:0001649 | 0.721908 |
| GO:0048661 | 0.721041 |
| GO:0010941 | 0.719507 |
| GO:1900407 | 0.716777 |
| GO:0007166 | 0.714859 |
| GO:0002683 | 0.708778 |
| GO:1902903 | 0.708773 |
| GO:0008285 | 0.705969 |
| GO:0030900 | 0.704312 |
| GO:0034504 | 0.704188 |
| GO:0033365 | 0.703782 |
| GO:0070997 | 0.703482 |
| GO:0033043 | 0.697664 |
| GO:0051249 | 0.687925 |
| GO:0008104 | 0.675076 |
| GO:0033002 | 0.627125 |
| GO:0042593 | 0.364293 |
| GO:0071887 | 0.359703 |
| GO:0044262 | 0.349744 |
| GO:0051640 | 0.348052 |
| GO:0051000 | 0.299934 |
| GO:0050778 | 0.290903 |
| GO:0007156 | 0.278928 |
| GO:0008361 | 0.265902 |
| GO:0070301 | 0.262988 |
| GO:0022407 | 0.258553 |
| GO:0015031 | 0.257560 |
| GO:0043525 | 0.248848 |
| GO:0051353 | 0.242570 |
| GO:0043086 | 0.239824 |
| GO:0045471 | 0.229314 |
| GO:0051497 | 0.208992 |
| GO:0031529 | 0.208648 |
| GO:0099504 | 0.206822 |
| GO:0043547 | 0.194217 |
| GO:1904659 | 0.191457 |
| GO:0031334 | 0.184335 |
| GO:0046677 | 0.183479 |
| GO:0015980 | 0.180476 |
| GO:0060291 | 0.175234 |
| GO:0009259 | 0.166290 |
| GO:0060173 | 0.162212 |
| GO:0042632 | 0.145299 |
| GO:0046890 | 0.144186 |
| GO:0032760 | 0.142722 |
| GO:0051302 | 0.135000 |
| GO:0031295 | 0.134696 |
| GO:0019318 | 0.123006 |
| GO:0010951 | 0.120040 |
| GO:0021987 | 0.119137 |
| GO:0006163 | 0.118024 |
| GO:0030041 | 0.107955 |
| GO:0001892 | 0.106324 |
| GO:0030512 | 0.105991 |
| GO:0060079 | 0.105991 |
| GO:0050770 | 0.098547 |
| GO:0051928 | 0.097553 |
| GO:0031397 | 0.094042 |
| GO:0060041 | 0.082956 |
| GO:0051047 | 0.076258 |
| GO:0019722 | 0.041730 |
| GO:0090042 | 0.035699 |
sns.set(rc={'figure.figsize':(6,4)})
perc = str(round((100*len(GO_terms_auc_svm_df_final[GO_terms_auc_svm_df_final["auc"]>0.7])/len(GO_terms_auc_svm_df_final)),2))+"%"
N, bins, patches = plt.hist(GO_terms_auc_svm_df_final, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
if bins[i]>0.7:
patches[i].set_facecolor(CB_color_cycle[2])
plt.xlabel("AUC (logistic 1)", fontsize=16)
plt.title(perc, fontsize=16)
# con el que mejor funciona es con la suma normal del attribution Text(0.5, 1.0, '94.68%')

Final model AUPR
GO_terms_aupr_svm_df_final = pd.DataFrame(list(GO_terms_aupr_svm_final.items()),columns = ['goterm','aupr']).set_index("goterm")
GO_terms_aupr_svm_df_final = GO_terms_aupr_svm_df_final.dropna()
GO_terms_aupr_svm_df_final.sort_values(by=["aupr"], ascending=False).head()| aupr | |
|---|---|
| goterm | |
| GO:0036289 | 1.000000 |
| GO:0050896 | 0.995438 |
| GO:0043170 | 0.989680 |
| GO:0006807 | 0.987396 |
| GO:0060440 | 0.978213 |
# TENGO PROBLEMA CON EL RECALL
sns.set(rc={'figure.figsize':(5,3)})
perc = str(round((100*len(GO_terms_aupr_svm_df_final[GO_terms_aupr_svm_df_final["aupr"]>0.7])/len(GO_terms_aupr_svm_df_final)),2))+"%"
N, bins, patches = plt.hist(GO_terms_aupr_svm_df_final, color=CB_color_cycle[6],bins=50, linewidth=0.1)
for i in range(0,len(bins)-1):
if bins[i]>0.7:
patches[i].set_facecolor(CB_color_cycle[3])
plt.xlabel("AUPR", fontsize=16)
plt.title(perc, fontsize=16)Text(0.5, 1.0, '20.02%')

Predict for a new drug
Make predictions
unknown = list(set(attribution_data_all.columns)-set(attribution_data_annotated.columns))Get the probabilities for all unknown drugs
predictions = {}
distances = {}
probabilities_unknown = pd.DataFrame()
preds_unknown = pd.DataFrame()
for drug in unknown:
probabilities = {}
for goterm in models_svm.keys():
list_nodes = list(models_svm[goterm].feature_names_in_) # Extract the feature names from the model (those are the attributions we need)
score = attribution_data_all.loc[list_nodes][drug].to_frame().T
score_mod = score.divide(attribution_data_annotated.loc[list_nodes].T.std()).fillna(0) #divide by std of each neuron, only use drugs that trained the models
predictions[goterm]=models_svm[goterm].predict(score_mod)
probabilities[goterm] = models_svm[goterm].predict_proba(score_mod)[::,1] # platt values
# distances[goterm] = models_svm[goterm].decision_function(score_mod)
drug_probs = pd.DataFrame.from_dict(probabilities).T
drug_probs.columns = [drug]
drug_preds = pd.DataFrame.from_dict(predictions).T
drug_preds.columns = [drug]
probabilities_unknown = pd.concat([probabilities_unknown,drug_probs], axis=1)
preds_unknown = pd.concat([preds_unknown,drug_preds], axis=1)
print(drug)Study drug with unknown MOA
Choose drug with unknown MOA…
combobox_u = interactive(f, drug=widgets.Combobox(options=unknown))predictions_nodes = []
for goterm in list(platt_matrix.index):
predictions_nodes.append(goterm+"_"+str(1))# add names to go terms
real_go_info_svm= real_go_info[real_go_info.GO_term.isin(predictions_nodes)]
real_go_info_svm.GO_term = real_go_info_svm.GO_term.str.replace("_1","")display(combobox_u)selected_drug_u_name = combobox_u.resultpredictions_df = pd.DataFrame.from_dict(preds_unknown.loc[:,selected_drug_u_name]).reset_index()
predictions_df.columns = ["GO_term","predictions"]probabilities_df = pd.DataFrame.from_dict(probabilities_unknown.loc[:,selected_drug_u_name]).reset_index()
probabilities_df.columns = ["GO_term","probability"]
probabilities_df = probabilities_df.merge(real_go_info_svm, on="GO_term")
probabilities_df = probabilities_df.merge(predictions_df, on="GO_term")
probabilities_df.loc[probabilities_df["layer_number"] <=3].sort_values(by=["probability"], ascending=False).head(200)| GO_term | probability | Name | layer_number | predictions | |
|---|---|---|---|---|---|
| 820 | GO:0033993 | 0.809556 | Response to lipid (1) | 3.0 | 1.0 |
| 223 | GO:0018108 | 0.783375 | Peptidyl-tyrosine phosphorylation (1) | 3.0 | 1.0 |
| 573 | GO:0010629 | 0.742615 | Negative regulation of gene expression (1) | 3.0 | 1.0 |
| 106 | GO:0071900 | 0.725062 | Regulation of protein serine/threonine kinase activity (1) | 2.0 | 1.0 |
| 624 | GO:0010628 | 0.702924 | Positive regulation of gene expression (1) | 3.0 | 1.0 |
| 74 | GO:0001817 | 0.687600 | Regulation of cytokine production (1) | 3.0 | 1.0 |
| 44 | GO:0048812 | 0.672129 | Neuron projection morphogenesis (1) | 3.0 | 1.0 |
| 224 | GO:0046777 | 0.661031 | Protein autophosphorylation (1) | 1.0 | 1.0 |
| 99 | GO:0001934 | 0.658374 | Positive regulation of protein phosphorylation (1) | 3.0 | 1.0 |
| 570 | GO:0045597 | 0.628072 | Positive regulation of cell differentiation (1) | 3.0 | 1.0 |
| 839 | GO:0031047 | 0.553888 | Gene silencing by rna (1) | 2.0 | 1.0 |
| 100 | GO:0033138 | 0.542770 | Positive regulation of peptidyl-serine phosphorylation (1) | 1.0 | 1.0 |
| 888 | GO:0034976 | 0.540483 | Response to endoplasmic reticulum stress (1) | 3.0 | 1.0 |
| 633 | GO:0051301 | 0.535293 | Cell division (1) | 2.0 | 1.0 |
| 821 | GO:0034097 | 0.533600 | Response to cytokine (1) | 3.0 | 1.0 |
| 423 | GO:1902533 | 0.530199 | Positive regulation of intracellular signal transduction (1) | 2.0 | 1.0 |
| 596 | GO:0060341 | 0.523390 | Regulation of cellular localization (1) | 3.0 | 0.0 |
| 729 | GO:0120035 | 0.514954 | Regulation of plasma membrane bounded cell projection organization (1) | 3.0 | 1.0 |
| 568 | GO:0008284 | 0.513916 | Positive regulation of cell population proliferation (1) | 2.0 | 1.0 |
| 558 | GO:0016032 | 0.500000 | Viral process (1) | 3.0 | 1.0 |
| 641 | GO:0071417 | 0.494543 | Cellular response to organonitrogen compound (1) | 3.0 | 1.0 |
| 353 | GO:0006954 | 0.482633 | Inflammatory response (1) | 3.0 | 1.0 |
| 9 | GO:0043408 | 0.476159 | Regulation of mapk cascade (1) | 2.0 | 1.0 |
| 808 | GO:1902532 | 0.449154 | Negative regulation of intracellular signal transduction (1) | 3.0 | 1.0 |
| 11 | GO:0043406 | 0.445845 | Positive regulation of map kinase activity (1) | 1.0 | 1.0 |
| 8 | GO:0000165 | 0.443783 | Mapk cascade (1) | 3.0 | 0.0 |
| 358 | GO:0007005 | 0.427023 | Mitochondrion organization (1) | 3.0 | 1.0 |
| 134 | GO:0002366 | 0.423235 | Leukocyte activation involved in immune response (1) | 3.0 | 1.0 |
| 221 | GO:0018105 | 0.422261 | Peptidyl-serine phosphorylation (1) | 2.0 | 0.0 |
| 285 | GO:0051051 | 0.421500 | Negative regulation of transport (1) | 3.0 | 1.0 |
| 191 | GO:0045944 | 0.420524 | Positive regulation of transcription by rna polymerase ii (1) | 2.0 | 1.0 |
| 654 | GO:0090398 | 0.416328 | Cellular senescence (1) | 1.0 | 1.0 |
| 847 | GO:0045055 | 0.414665 | Regulated exocytosis (1) | 2.0 | 1.0 |
| 342 | GO:2001243 | 0.411574 | Negative regulation of intrinsic apoptotic signaling pathway (1) | 2.0 | 1.0 |
| 510 | GO:0042063 | 0.408279 | Gliogenesis (1) | 3.0 | 1.0 |
| 788 | GO:0009410 | 0.407937 | Response to xenobiotic stimulus (1) | 2.0 | 1.0 |
| 824 | GO:0071363 | 0.407544 | Cellular response to growth factor stimulus (1) | 3.0 | 1.0 |
| 496 | GO:0048608 | 0.406699 | Reproductive structure development (1) | 2.0 | 1.0 |
| 528 | GO:0007565 | 0.406022 | Female pregnancy (1) | 2.0 | 1.0 |
| 896 | GO:0097193 | 0.400210 | Intrinsic apoptotic signaling pathway (1) | 3.0 | 1.0 |
| 76 | GO:0001819 | 0.388417 | Positive regulation of cytokine production (1) | 2.0 | 1.0 |
| 48 | GO:0001525 | 0.386728 | Angiogenesis (1) | 2.0 | 1.0 |
| 300 | GO:0032386 | 0.384834 | Regulation of intracellular transport (1) | 2.0 | 1.0 |
| 906 | GO:0043549 | 0.378711 | Regulation of kinase activity (1) | 3.0 | 0.0 |
| 662 | GO:0031648 | 0.377411 | Protein destabilization (1) | 0.0 | 1.0 |
| 516 | GO:0007423 | 0.372363 | Sensory organ development (1) | 3.0 | 1.0 |
| 461 | GO:0050804 | 0.367884 | Modulation of chemical synaptic transmission (1) | 3.0 | 1.0 |
| 104 | GO:0006469 | 0.364467 | Negative regulation of protein kinase activity (1) | 2.0 | 1.0 |
| 620 | GO:0051098 | 0.363823 | Regulation of binding (1) | 3.0 | 1.0 |
| 86 | GO:0072006 | 0.358531 | Nephron development (1) | 2.0 | 1.0 |
| 473 | GO:0008584 | 0.350294 | Male gonad development (1) | 1.0 | 1.0 |
| 536 | GO:0007610 | 0.346612 | Behavior (1) | 3.0 | 0.0 |
| 693 | GO:1904646 | 0.344997 | Cellular response to amyloid-beta (1) | 0.0 | 1.0 |
| 376 | GO:0007015 | 0.344878 | Actin filament organization (1) | 3.0 | 1.0 |
| 552 | GO:0033365 | 0.343027 | Protein localization to organelle (1) | 3.0 | 1.0 |
| 774 | GO:0030216 | 0.342203 | Keratinocyte differentiation (1) | 2.0 | 1.0 |
| 639 | GO:0060326 | 0.341919 | Cell chemotaxis (1) | 2.0 | 1.0 |
| 154 | GO:0050778 | 0.335649 | Positive regulation of immune response (1) | 3.0 | 0.0 |
| 480 | GO:0048565 | 0.333289 | Digestive tract development (1) | 1.0 | 1.0 |
| 794 | GO:0043434 | 0.332497 | Response to peptide hormone (1) | 3.0 | 1.0 |
| 651 | GO:0050808 | 0.322913 | Synapse organization (1) | 3.0 | 1.0 |
| 772 | GO:0060485 | 0.320964 | Mesenchyme development (1) | 3.0 | 1.0 |
| 406 | GO:0048041 | 0.319689 | Focal adhesion assembly (1) | 1.0 | 1.0 |
| 698 | GO:0070663 | 0.318299 | Regulation of leukocyte proliferation (1) | 2.0 | 1.0 |
| 804 | GO:0030855 | 0.317904 | Epithelial cell differentiation (1) | 3.0 | 0.0 |
| 538 | GO:0048266 | 0.308838 | Behavioral response to pain (1) | 0.0 | 1.0 |
| 225 | GO:0006470 | 0.305848 | Protein dephosphorylation (1) | 3.0 | 1.0 |
| 719 | GO:0043244 | 0.305739 | Regulation of protein-containing complex disassembly (1) | 2.0 | 1.0 |
| 725 | GO:0070997 | 0.305130 | Neuron death (1) | 2.0 | 0.0 |
| 532 | GO:0007596 | 0.304871 | Blood coagulation (1) | 3.0 | 1.0 |
| 806 | GO:0051056 | 0.302828 | Regulation of small gtpase mediated signal transduction (1) | 3.0 | 1.0 |
| 433 | GO:0097191 | 0.302625 | Extrinsic apoptotic signaling pathway (1) | 3.0 | 1.0 |
| 574 | GO:0008285 | 0.300631 | Negative regulation of cell population proliferation (1) | 3.0 | 0.0 |
| 49 | GO:0001569 | 0.300115 | Branching involved in blood vessel morphogenesis (1) | 0.0 | 1.0 |
| 926 | GO:0060020 | 0.297104 | Bergmann glial cell differentiation (1) | 0.0 | 1.0 |
| 789 | GO:0009416 | 0.295539 | Response to light stimulus (1) | 2.0 | 0.0 |
| 887 | GO:0034504 | 0.294393 | Protein localization to nucleus (1) | 2.0 | 1.0 |
| 169 | GO:0002764 | 0.292841 | Immune response-regulating signaling pathway (1) | 3.0 | 0.0 |
| 890 | GO:0071353 | 0.290912 | Cellular response to interleukin-4 (1) | 1.0 | 1.0 |
| 505 | GO:0007283 | 0.290337 | Spermatogenesis (1) | 2.0 | 1.0 |
| 513 | GO:0030900 | 0.289030 | Forebrain development (1) | 3.0 | 1.0 |
| 241 | GO:0006612 | 0.288796 | Protein targeting to membrane (1) | 1.0 | 1.0 |
| 644 | GO:0071230 | 0.284292 | Cellular response to amino acid stimulus (1) | 1.0 | 1.0 |
| 712 | GO:0033002 | 0.280632 | Muscle cell proliferation (1) | 2.0 | 1.0 |
| 317 | GO:0006897 | 0.277768 | Endocytosis (1) | 3.0 | 1.0 |
| 230 | GO:0030162 | 0.277751 | Regulation of proteolysis (1) | 3.0 | 0.0 |
| 607 | GO:0042391 | 0.276380 | Regulation of membrane potential (1) | 3.0 | 0.0 |
| 689 | GO:1905897 | 0.275961 | Regulation of response to endoplasmic reticulum stress (1) | 2.0 | 1.0 |
| 105 | GO:0045860 | 0.275729 | Positive regulation of protein kinase activity (1) | 2.0 | 0.0 |
| 198 | GO:0006260 | 0.275261 | Dna replication (1) | 3.0 | 1.0 |
| 548 | GO:1903829 | 0.274893 | Positive regulation of protein localization (1) | 3.0 | 0.0 |
| 934 | GO:0051258 | 0.274092 | Protein polymerization (1) | 3.0 | 1.0 |
| 378 | GO:0031532 | 0.271109 | Actin cytoskeleton reorganization (1) | 1.0 | 1.0 |
| 216 | GO:0045727 | 0.270882 | Positive regulation of translation (1) | 1.0 | 1.0 |
| 642 | GO:0034599 | 0.270088 | Cellular response to oxidative stress (1) | 3.0 | 0.0 |
| 767 | GO:0051146 | 0.266722 | Striated muscle cell differentiation (1) | 2.0 | 0.0 |
| 77 | GO:0002718 | 0.262195 | Regulation of cytokine production involved in immune response (1) | 2.0 | 1.0 |
| 19 | GO:0031109 | 0.261030 | Microtubule polymerization or depolymerization (1) | 2.0 | 1.0 |
| 584 | GO:0040008 | 0.259597 | Regulation of growth (1) | 3.0 | 0.0 |
| 937 | GO:0051640 | 0.257672 | Organelle localization (1) | 3.0 | 0.0 |
| 377 | GO:0031032 | 0.257340 | Actomyosin structure organization (1) | 2.0 | 1.0 |
| 904 | GO:0042113 | 0.254618 | B cell activation (1) | 3.0 | 0.0 |
| 133 | GO:0043303 | 0.253768 | Mast cell degranulation (1) | 1.0 | 1.0 |
| 561 | GO:0048511 | 0.251652 | Rhythmic process (1) | 3.0 | 1.0 |
| 243 | GO:0006606 | 0.250144 | Protein import into nucleus (1) | 1.0 | 1.0 |
| 836 | GO:1901987 | 0.249107 | Regulation of cell cycle phase transition (1) | 3.0 | 0.0 |
| 758 | GO:0031099 | 0.247922 | Regeneration (1) | 2.0 | 1.0 |
| 739 | GO:1902903 | 0.245529 | Regulation of supramolecular fiber organization (1) | 3.0 | 1.0 |
| 323 | GO:0016236 | 0.245469 | Macroautophagy (1) | 3.0 | 1.0 |
| 478 | GO:0048568 | 0.244894 | Embryonic organ development (1) | 3.0 | 0.0 |
| 103 | GO:0042531 | 0.243583 | Positive regulation of tyrosine phosphorylation of stat protein (1) | 0.0 | 1.0 |
| 598 | GO:0043254 | 0.240098 | Regulation of protein-containing complex assembly (1) | 3.0 | 1.0 |
| 864 | GO:0030183 | 0.236405 | B cell differentiation (1) | 1.0 | 0.0 |
| 301 | GO:0032388 | 0.235842 | Positive regulation of intracellular transport (1) | 1.0 | 0.0 |
| 692 | GO:0010595 | 0.235403 | Positive regulation of endothelial cell migration (1) | 2.0 | 1.0 |
| 865 | GO:0030217 | 0.234830 | T cell differentiation (1) | 3.0 | 0.0 |
| 20 | GO:0070507 | 0.234661 | Regulation of microtubule cytoskeleton organization (1) | 2.0 | 1.0 |
| 111 | GO:0031069 | 0.234249 | Hair follicle morphogenesis (1) | 0.0 | 1.0 |
| 363 | GO:0051494 | 0.232679 | Negative regulation of cytoskeleton organization (1) | 2.0 | 1.0 |
| 226 | GO:0035304 | 0.232643 | Regulation of protein dephosphorylation (1) | 2.0 | 1.0 |
| 913 | GO:0090630 | 0.231968 | Activation of gtpase activity (1) | 0.0 | 1.0 |
| 733 | GO:0030335 | 0.231192 | Positive regulation of cell migration (1) | 3.0 | 0.0 |
| 196 | GO:0006357 | 0.230696 | Regulation of transcription by rna polymerase ii (1) | 3.0 | 1.0 |
| 524 | GO:0007519 | 0.230417 | Skeletal muscle tissue development (1) | 2.0 | 1.0 |
| 53 | GO:0001570 | 0.230416 | Vasculogenesis (1) | 1.0 | 1.0 |
| 26 | GO:1901990 | 0.229708 | Regulation of mitotic cell cycle phase transition (1) | 2.0 | 0.0 |
| 33 | GO:0000423 | 0.229518 | Mitophagy (1) | 1.0 | 1.0 |
| 615 | GO:0035265 | 0.227576 | Organ growth (1) | 2.0 | 0.0 |
| 84 | GO:0001822 | 0.227091 | Kidney development (1) | 3.0 | 0.0 |
| 151 | GO:0006959 | 0.225795 | Humoral immune response (1) | 2.0 | 1.0 |
| 244 | GO:0042307 | 0.225164 | Positive regulation of protein import into nucleus (1) | 0.0 | 1.0 |
| 24 | GO:0007346 | 0.224934 | Regulation of mitotic cell cycle (1) | 3.0 | 0.0 |
| 162 | GO:0060374 | 0.223666 | Mast cell differentiation (1) | 0.0 | 1.0 |
| 533 | GO:0030168 | 0.219923 | Platelet activation (1) | 2.0 | 1.0 |
| 152 | GO:0045087 | 0.218376 | Innate immune response (1) | 3.0 | 0.0 |
| 523 | GO:0007517 | 0.217930 | Muscle organ development (1) | 3.0 | 0.0 |
| 138 | GO:0002683 | 0.217495 | Negative regulation of immune system process (1) | 3.0 | 0.0 |
| 319 | GO:0006909 | 0.217477 | Phagocytosis (1) | 2.0 | 1.0 |
| 18 | GO:0000226 | 0.216530 | Microtubule cytoskeleton organization (1) | 3.0 | 0.0 |
| 622 | GO:0043086 | 0.212387 | Negative regulation of catalytic activity (1) | 3.0 | 1.0 |
| 898 | GO:0035924 | 0.211788 | Cellular response to vascular endothelial growth factor stimulus (1) | 2.0 | 1.0 |
| 187 | GO:0071897 | 0.211315 | Dna biosynthetic process (1) | 2.0 | 0.0 |
| 517 | GO:0043586 | 0.209986 | Tongue development (1) | 1.0 | 1.0 |
| 606 | GO:0048638 | 0.209981 | Regulation of developmental growth (1) | 2.0 | 0.0 |
| 736 | GO:0009617 | 0.209127 | Response to bacterium (1) | 3.0 | 0.0 |
| 691 | GO:0043542 | 0.207450 | Endothelial cell migration (1) | 3.0 | 0.0 |
| 321 | GO:0010507 | 0.205494 | Negative regulation of autophagy (1) | 1.0 | 1.0 |
| 149 | GO:0050853 | 0.205417 | B cell receptor signaling pathway (1) | 1.0 | 0.0 |
| 885 | GO:1900180 | 0.205300 | Regulation of protein localization to nucleus (1) | 1.0 | 1.0 |
| 907 | GO:0051881 | 0.202707 | Regulation of mitochondrial membrane potential (1) | 1.0 | 0.0 |
| 171 | GO:0003014 | 0.201637 | Renal system process (1) | 2.0 | 1.0 |
| 211 | GO:0031507 | 0.200984 | Heterochromatin assembly (1) | 1.0 | 1.0 |
| 64 | GO:0071456 | 0.200564 | Cellular response to hypoxia (1) | 1.0 | 0.0 |
| 694 | GO:0032869 | 0.197755 | Cellular response to insulin stimulus (1) | 2.0 | 0.0 |
| 779 | GO:0008544 | 0.196206 | Epidermis development (1) | 3.0 | 0.0 |
| 634 | GO:0061024 | 0.193065 | Membrane organization (1) | 2.0 | 0.0 |
| 50 | GO:0002040 | 0.192737 | Sprouting angiogenesis (1) | 1.0 | 0.0 |
| 610 | GO:0031333 | 0.191284 | Negative regulation of protein-containing complex assembly (1) | 2.0 | 1.0 |
| 657 | GO:0045165 | 0.191259 | Cell fate commitment (1) | 3.0 | 0.0 |
| 435 | GO:0016055 | 0.190638 | Wnt signaling pathway (1) | 2.0 | 0.0 |
| 569 | GO:0030307 | 0.190101 | Positive regulation of cell growth (1) | 2.0 | 1.0 |
| 200 | GO:0006281 | 0.189187 | Dna repair (1) | 2.0 | 0.0 |
| 63 | GO:0001666 | 0.188575 | Response to hypoxia (1) | 2.0 | 1.0 |
| 560 | GO:0043473 | 0.188137 | Pigmentation (1) | 2.0 | 1.0 |
| 521 | GO:0035051 | 0.187551 | Cardiocyte differentiation (1) | 2.0 | 1.0 |
| 690 | GO:2001020 | 0.187450 | Regulation of response to dna damage stimulus (1) | 2.0 | 0.0 |
| 346 | GO:0006936 | 0.187431 | Muscle contraction (1) | 3.0 | 1.0 |
| 328 | GO:0043065 | 0.184805 | Positive regulation of apoptotic process (1) | 2.0 | 0.0 |
| 650 | GO:0034329 | 0.183554 | Cell junction assembly (1) | 2.0 | 0.0 |
| 262 | GO:0045429 | 0.183059 | Positive regulation of nitric oxide biosynthetic process (1) | 0.0 | 1.0 |
| 117 | GO:0060562 | 0.183024 | Epithelial tube morphogenesis (1) | 2.0 | 0.0 |
| 583 | GO:0032967 | 0.182852 | Positive regulation of collagen biosynthetic process (1) | 0.0 | 1.0 |
| 208 | GO:0006325 | 0.182003 | Chromatin organization (1) | 3.0 | 0.0 |
| 209 | GO:0006338 | 0.181855 | Chromatin remodeling (1) | 2.0 | 1.0 |
| 882 | GO:0032147 | 0.181843 | Activation of protein kinase activity (1) | 1.0 | 0.0 |
| 7 | GO:0000122 | 0.181449 | Negative regulation of transcription by rna polymerase ii (1) | 1.0 | 0.0 |
| 911 | GO:0060416 | 0.180831 | Response to growth hormone (1) | 1.0 | 1.0 |
| 121 | GO:0090050 | 0.180206 | Positive regulation of cell migration involved in sprouting angiogenesis (1) | 0.0 | 1.0 |
| 467 | GO:0009791 | 0.179613 | Post-embryonic development (1) | 1.0 | 0.0 |
| 870 | GO:0070527 | 0.179383 | Platelet aggregation (1) | 1.0 | 1.0 |
| 781 | GO:0008625 | 0.179216 | Extrinsic apoptotic signaling pathway via death domain receptors (1) | 1.0 | 1.0 |
| 702 | GO:0048146 | 0.179210 | Positive regulation of fibroblast proliferation (1) | 0.0 | 1.0 |
| 785 | GO:0009266 | 0.178944 | Response to temperature stimulus (1) | 2.0 | 1.0 |
| 316 | GO:0033157 | 0.178583 | Regulation of intracellular protein transport (1) | 1.0 | 0.0 |
| 472 | GO:0001553 | 0.178541 | Luteinization (1) | 0.0 | 1.0 |
| 174 | GO:0010613 | 0.176998 | Positive regulation of cardiac muscle hypertrophy (1) | 1.0 | 1.0 |
| 842 | GO:0071407 | 0.176595 | Cellular response to organic cyclic compound (1) | 3.0 | 0.0 |
| 834 | GO:0035195 | 0.176389 | Gene silencing by mirna (1) | 1.0 | 0.0 |
| 504 | GO:0048709 | 0.176248 | Oligodendrocyte differentiation (1) | 2.0 | 1.0 |
| 54 | GO:2001214 | 0.175971 | Positive regulation of vasculogenesis (1) | 0.0 | 1.0 |
| 600 | GO:0010632 | 0.174413 | Regulation of epithelial cell migration (1) | 3.0 | 0.0 |
| 682 | GO:0007026 | 0.174040 | Negative regulation of microtubule depolymerization (1) | 0.0 | 1.0 |
| 276 | GO:0016567 | 0.173194 | Protein ubiquitination (1) | 3.0 | 0.0 |
| 881 | GO:0031929 | 0.171840 | Tor signaling (1) | 2.0 | 1.0 |
| 52 | GO:0001541 | 0.171713 | Ovarian follicle development (1) | 1.0 | 0.0 |
| 310 | GO:0051924 | 0.171261 | Regulation of calcium ion transport (1) | 3.0 | 0.0 |
| 899 | GO:0035994 | 0.170901 | Response to muscle stretch (1) | 1.0 | 1.0 |
| 32 | GO:0000422 | 0.170612 | Autophagy of mitochondrion (1) | 2.0 | 1.0 |
| 703 | GO:0048661 | 0.169791 | Positive regulation of smooth muscle cell proliferation (1) | 1.0 | 0.0 |
| 447 | GO:0007173 | 0.169218 | Epidermal growth factor receptor signaling pathway (1) | 2.0 | 0.0 |
sum(probabilities_df["predictions"] ==1)288
sum(probabilities_df["predictions"] ==0)651
Probability < 0.5 doesn’t mean it does not belong to the class, a probability of for example 0.2 can represent a 1 (annotated to MoA)
Modify probabilities
Take into account the annotations each GO term has (general GO terms are easier to predict as they have more annotations)
For drug with unknown MOA…
sum_annotations = slim_matrix_single_neuron.T.sum()/slim_matrix_single_neuron.shape[1]
logits_apriori = np.log(sum_annotations/(1-sum_annotations))
logits_apost= np.log(probabilities_df["probability"]/(1-probabilities_df["probability"]))
delta_logits =logits_apost.to_numpy()- logits_apriori.to_numpy()
delta_logits_df = pd.DataFrame(delta_logits)
delta_logits_df.columns = ["delta_logits"]
probabilities_mod = probabilities_df.merge(delta_logits_df, left_index=True,right_index=True)probabilities_mod.loc[probabilities_mod["predictions"] ==1].loc[probabilities_mod["layer_number"] <= 7].sort_values(by=["delta_logits"], ascending=False)| GO_term | probability | Name | layer_number | predictions | delta_logits | |
|---|---|---|---|---|---|---|
| 839 | GO:0031047 | 0.553888 | Gene silencing by rna (1) | 2.0 | 1.0 | 2.192458 |
| 662 | GO:0031648 | 0.377411 | Protein destabilization (1) | 0.0 | 1.0 | 2.092834 |
| 106 | GO:0071900 | 0.725062 | Regulation of protein serine/threonine kinase activity (1) | 2.0 | 1.0 | 1.944569 |
| 33 | GO:0000423 | 0.229518 | Mitophagy (1) | 1.0 | 1.0 | 1.880007 |
| 538 | GO:0048266 | 0.308838 | Behavioral response to pain (1) | 0.0 | 1.0 | 1.857031 |
| 913 | GO:0090630 | 0.231968 | Activation of gtpase activity (1) | 0.0 | 1.0 | 1.793943 |
| 216 | GO:0045727 | 0.270882 | Positive regulation of translation (1) | 1.0 | 1.0 | 1.746070 |
| 223 | GO:0018108 | 0.783375 | Peptidyl-tyrosine phosphorylation (1) | 3.0 | 1.0 | 1.672826 |
| 719 | GO:0043244 | 0.305739 | Regulation of protein-containing complex disassembly (1) | 2.0 | 1.0 | 1.646098 |
| 241 | GO:0006612 | 0.288796 | Protein targeting to membrane (1) | 1.0 | 1.0 | 1.626841 |
| 104 | GO:0006469 | 0.364467 | Negative regulation of protein kinase activity (1) | 2.0 | 1.0 | 1.593794 |
| 888 | GO:0034976 | 0.540483 | Response to endoplasmic reticulum stress (1) | 3.0 | 1.0 | 1.575980 |
| 890 | GO:0071353 | 0.290912 | Cellular response to interleukin-4 (1) | 1.0 | 1.0 | 1.575256 |
| 638 | GO:0033554 | 0.884331 | Cellular response to stress (1) | 4.0 | 1.0 | 1.573975 |
| 820 | GO:0033993 | 0.809556 | Response to lipid (1) | 3.0 | 1.0 | 1.551573 |
| 74 | GO:0001817 | 0.687600 | Regulation of cytokine production (1) | 3.0 | 1.0 | 1.534715 |
| 682 | GO:0007026 | 0.174040 | Negative regulation of microtubule depolymerization (1) | 0.0 | 1.0 | 1.533781 |
| 528 | GO:0007565 | 0.406022 | Female pregnancy (1) | 2.0 | 1.0 | 1.478881 |
| 224 | GO:0046777 | 0.661031 | Protein autophosphorylation (1) | 1.0 | 1.0 | 1.453821 |
| 544 | GO:0060179 | 0.161090 | Male mating behavior (1) | 0.0 | 1.0 | 1.440901 |
| 32 | GO:0000422 | 0.170612 | Autophagy of mitochondrion (1) | 2.0 | 1.0 | 1.409881 |
| 774 | GO:0030216 | 0.342203 | Keratinocyte differentiation (1) | 2.0 | 1.0 | 1.406530 |
| 44 | GO:0048812 | 0.672129 | Neuron projection morphogenesis (1) | 3.0 | 1.0 | 1.385004 |
| 472 | GO:0001553 | 0.178541 | Luteinization (1) | 0.0 | 1.0 | 1.373326 |
| 77 | GO:0002718 | 0.262195 | Regulation of cytokine production involved in immune response (1) | 2.0 | 1.0 | 1.372827 |
| 900 | GO:0042060 | 0.612497 | Wound healing (1) | 4.0 | 1.0 | 1.325911 |
| 385 | GO:0060632 | 0.143694 | Regulation of microtubule-based movement (1) | 1.0 | 1.0 | 1.306104 |
| 726 | GO:0065003 | 0.654123 | Protein-containing complex assembly (1) | 4.0 | 1.0 | 1.304384 |
| 926 | GO:0060020 | 0.297104 | Bergmann glial cell differentiation (1) | 0.0 | 1.0 | 1.288696 |
| 582 | GO:1902459 | 0.140358 | Positive regulation of stem cell population maintenance (1) | 0.0 | 1.0 | 1.278722 |
| 63 | GO:0001666 | 0.188575 | Response to hypoxia (1) | 2.0 | 1.0 | 1.276928 |
| 342 | GO:2001243 | 0.411574 | Negative regulation of intrinsic apoptotic signaling pathway (1) | 2.0 | 1.0 | 1.262448 |
| 573 | GO:0010629 | 0.742615 | Negative regulation of gene expression (1) | 3.0 | 1.0 | 1.216446 |
| 480 | GO:0048565 | 0.333289 | Digestive tract development (1) | 1.0 | 1.0 | 1.203775 |
| 174 | GO:0010613 | 0.176998 | Positive regulation of cardiac muscle hypertrophy (1) | 1.0 | 1.0 | 1.199403 |
| 676 | GO:0030282 | 0.116755 | Bone mineralization (1) | 1.0 | 1.0 | 1.177409 |
| 49 | GO:0001569 | 0.300115 | Branching involved in blood vessel morphogenesis (1) | 0.0 | 1.0 | 1.170620 |
| 899 | GO:0035994 | 0.170901 | Response to muscle stretch (1) | 1.0 | 1.0 | 1.156962 |
| 443 | GO:0035860 | 0.114067 | Glial cell-derived neurotrophic factor receptor signaling pathway (1) | 0.0 | 1.0 | 1.151083 |
| 847 | GO:0045055 | 0.414665 | Regulated exocytosis (1) | 2.0 | 1.0 | 1.096844 |
| 134 | GO:0002366 | 0.423235 | Leukocyte activation involved in immune response (1) | 3.0 | 1.0 | 1.076788 |
| 181 | GO:0006139 | 0.875555 | Nucleobase-containing compound metabolic process (1) | 6.0 | 1.0 | 1.061941 |
| 689 | GO:1905897 | 0.275961 | Regulation of response to endoplasmic reticulum stress (1) | 2.0 | 1.0 | 1.052784 |
| 38 | GO:0000902 | 0.813929 | Cell morphogenesis (1) | 4.0 | 1.0 | 1.052132 |
| 654 | GO:0090398 | 0.416328 | Cellular senescence (1) | 1.0 | 1.0 | 1.048427 |
| 693 | GO:1904646 | 0.344997 | Cellular response to amyloid-beta (1) | 0.0 | 1.0 | 1.043235 |
| 100 | GO:0033138 | 0.542770 | Positive regulation of peptidyl-serine phosphorylation (1) | 1.0 | 1.0 | 1.018799 |
| 745 | GO:0009653 | 0.877370 | Anatomical structure morphogenesis (1) | 5.0 | 1.0 | 1.014649 |
| 198 | GO:0006260 | 0.275261 | Dna replication (1) | 3.0 | 1.0 | 1.007970 |
| 133 | GO:0043303 | 0.253768 | Mast cell degranulation (1) | 1.0 | 1.0 | 0.981406 |
| 368 | GO:0060271 | 0.126831 | Cilium assembly (1) | 3.0 | 1.0 | 0.970313 |
| 285 | GO:0051051 | 0.421500 | Negative regulation of transport (1) | 3.0 | 1.0 | 0.964316 |
| 626 | GO:0051649 | 0.802013 | Establishment of localization in cell (1) | 4.0 | 1.0 | 0.957089 |
| 378 | GO:0031532 | 0.271109 | Actin cytoskeleton reorganization (1) | 1.0 | 1.0 | 0.947004 |
| 651 | GO:0050808 | 0.322913 | Synapse organization (1) | 3.0 | 1.0 | 0.943923 |
| 11 | GO:0043406 | 0.445845 | Positive regulation of map kinase activity (1) | 1.0 | 1.0 | 0.939978 |
| 34 | GO:1903146 | 0.103490 | Regulation of autophagy of mitochondrion (1) | 1.0 | 1.0 | 0.932003 |
| 262 | GO:0045429 | 0.183059 | Positive regulation of nitric oxide biosynthetic process (1) | 0.0 | 1.0 | 0.911660 |
| 661 | GO:0046326 | 0.165116 | Positive regulation of glucose import (1) | 0.0 | 1.0 | 0.907432 |
| 911 | GO:0060416 | 0.180831 | Response to growth hormone (1) | 1.0 | 1.0 | 0.896690 |
| 571 | GO:2000010 | 0.127151 | Positive regulation of protein localization to cell surface (1) | 0.0 | 1.0 | 0.888565 |
| 633 | GO:0051301 | 0.535293 | Cell division (1) | 2.0 | 1.0 | 0.887200 |
| 483 | GO:0035909 | 0.162013 | Aorta morphogenesis (1) | 1.0 | 1.0 | 0.884752 |
| 54 | GO:2001214 | 0.175971 | Positive regulation of vasculogenesis (1) | 0.0 | 1.0 | 0.863534 |
| 416 | GO:0035556 | 0.887824 | Intracellular signal transduction (1) | 4.0 | 1.0 | 0.862843 |
| 470 | GO:0042733 | 0.097137 | Embryonic digit morphogenesis (1) | 0.0 | 1.0 | 0.861598 |
| 886 | GO:0034502 | 0.132453 | Protein localization to chromosome (1) | 2.0 | 1.0 | 0.856780 |
| 111 | GO:0031069 | 0.234249 | Hair follicle morphogenesis (1) | 0.0 | 1.0 | 0.832897 |
| 880 | GO:0031667 | 0.374380 | Response to nutrient levels (1) | 4.0 | 1.0 | 0.819334 |
| 833 | GO:0010467 | 0.880677 | Gene expression (1) | 5.0 | 1.0 | 0.817357 |
| 184 | GO:0006275 | 0.102018 | Regulation of dna replication (1) | 2.0 | 1.0 | 0.816174 |
| 162 | GO:0060374 | 0.223666 | Mast cell differentiation (1) | 0.0 | 1.0 | 0.815593 |
| 265 | GO:0051247 | 0.818839 | Positive regulation of protein metabolic process (1) | 4.0 | 1.0 | 0.802282 |
| 821 | GO:0034097 | 0.533600 | Response to cytokine (1) | 3.0 | 1.0 | 0.801775 |
| 408 | GO:0007165 | 0.930074 | Signal transduction (1) | 6.0 | 1.0 | 0.801131 |
| 849 | GO:0043966 | 0.142338 | Histone h3 acetylation (1) | 2.0 | 1.0 | 0.797379 |
| 112 | GO:0060789 | 0.091132 | Hair follicle placode formation (1) | 0.0 | 1.0 | 0.791156 |
| 558 | GO:0016032 | 0.500000 | Viral process (1) | 3.0 | 1.0 | 0.785929 |
| 151 | GO:0006959 | 0.225795 | Humoral immune response (1) | 2.0 | 1.0 | 0.785159 |
| 86 | GO:0072006 | 0.358531 | Nephron development (1) | 2.0 | 1.0 | 0.777591 |
| 83 | GO:0002720 | 0.163684 | Positive regulation of cytokine production involved in immune response (1) | 1.0 | 1.0 | 0.776348 |
| 53 | GO:0001570 | 0.230416 | Vasculogenesis (1) | 1.0 | 1.0 | 0.770097 |
| 532 | GO:0007596 | 0.304871 | Blood coagulation (1) | 3.0 | 1.0 | 0.764501 |
| 567 | GO:0051641 | 0.841801 | Cellular localization (1) | 5.0 | 1.0 | 0.761481 |
| 639 | GO:0060326 | 0.341919 | Cell chemotaxis (1) | 2.0 | 1.0 | 0.758938 |
| 19 | GO:0031109 | 0.261030 | Microtubule polymerization or depolymerization (1) | 2.0 | 1.0 | 0.746075 |
| 800 | GO:0030521 | 0.102431 | Androgen receptor signaling pathway (1) | 1.0 | 1.0 | 0.729092 |
| 511 | GO:0030182 | 0.721830 | Neuron differentiation (1) | 5.0 | 1.0 | 0.726501 |
| 806 | GO:0051056 | 0.302828 | Regulation of small gtpase mediated signal transduction (1) | 3.0 | 1.0 | 0.724275 |
| 527 | GO:0007528 | 0.109807 | Neuromuscular junction development (1) | 1.0 | 1.0 | 0.722229 |
| 794 | GO:0043434 | 0.332497 | Response to peptide hormone (1) | 3.0 | 1.0 | 0.716778 |
| 99 | GO:0001934 | 0.658374 | Positive regulation of protein phosphorylation (1) | 3.0 | 1.0 | 0.708241 |
| 896 | GO:0097193 | 0.400210 | Intrinsic apoptotic signaling pathway (1) | 3.0 | 1.0 | 0.705651 |
| 624 | GO:0010628 | 0.702924 | Positive regulation of gene expression (1) | 3.0 | 1.0 | 0.704419 |
| 722 | GO:0042325 | 0.780828 | Regulation of phosphorylation (1) | 5.0 | 1.0 | 0.698909 |
| 752 | GO:0043170 | 0.942545 | Macromolecule metabolic process (1) | 7.0 | 1.0 | 0.693446 |
| 517 | GO:0043586 | 0.209986 | Tongue development (1) | 1.0 | 1.0 | 0.692360 |
| 182 | GO:0016070 | 0.778891 | Rna metabolic process (1) | 5.0 | 1.0 | 0.687621 |
| 281 | GO:0006811 | 0.500000 | Ion transport (1) | 6.0 | 1.0 | 0.686632 |
| 586 | GO:2000773 | 0.121640 | Negative regulation of cellular senescence (1) | 0.0 | 1.0 | 0.685597 |
| 808 | GO:1902532 | 0.449154 | Negative regulation of intracellular signal transduction (1) | 3.0 | 1.0 | 0.684968 |
| 570 | GO:0045597 | 0.628072 | Positive regulation of cell differentiation (1) | 3.0 | 1.0 | 0.680798 |
| 76 | GO:0001819 | 0.388417 | Positive regulation of cytokine production (1) | 2.0 | 1.0 | 0.679733 |
| 934 | GO:0051258 | 0.274092 | Protein polymerization (1) | 3.0 | 1.0 | 0.677814 |
| 713 | GO:0035726 | 0.096936 | Common myeloid progenitor cell proliferation (1) | 0.0 | 1.0 | 0.667842 |
| 386 | GO:0007049 | 0.727375 | Cell cycle (1) | 6.0 | 1.0 | 0.665704 |
| 473 | GO:0008584 | 0.350294 | Male gonad development (1) | 1.0 | 1.0 | 0.663186 |
| 813 | GO:0051898 | 0.103239 | Negative regulation of protein kinase b signaling (1) | 0.0 | 1.0 | 0.653209 |
| 698 | GO:0070663 | 0.318299 | Regulation of leukocyte proliferation (1) | 2.0 | 1.0 | 0.652091 |
| 729 | GO:0120035 | 0.514954 | Regulation of plasma membrane bounded cell projection organization (1) | 3.0 | 1.0 | 0.631426 |
| 881 | GO:0031929 | 0.171840 | Tor signaling (1) | 2.0 | 1.0 | 0.624585 |
| 496 | GO:0048608 | 0.406699 | Reproductive structure development (1) | 2.0 | 1.0 | 0.619202 |
| 559 | GO:0022414 | 0.641754 | Reproductive process (1) | 4.0 | 1.0 | 0.617772 |
| 562 | GO:0050896 | 0.963328 | Response to stimulus (1) | 7.0 | 1.0 | 0.605804 |
| 381 | GO:0008064 | 0.113103 | Regulation of actin polymerization or depolymerization (1) | 2.0 | 1.0 | 0.603158 |
| 855 | GO:1903578 | 0.141298 | Regulation of atp metabolic process (1) | 1.0 | 1.0 | 0.602865 |
| 619 | GO:0050790 | 0.825081 | Regulation of catalytic activity (1) | 4.0 | 1.0 | 0.598054 |
| 476 | GO:0048714 | 0.112165 | Positive regulation of oligodendrocyte differentiation (1) | 0.0 | 1.0 | 0.593772 |
| 363 | GO:0051494 | 0.232679 | Negative regulation of cytoskeleton organization (1) | 2.0 | 1.0 | 0.593448 |
| 652 | GO:0042180 | 0.090402 | Cellular ketone metabolic process (1) | 3.0 | 1.0 | 0.590857 |
| 423 | GO:1902533 | 0.530199 | Positive regulation of intracellular signal transduction (1) | 2.0 | 1.0 | 0.581066 |
| 319 | GO:0006909 | 0.217477 | Phagocytosis (1) | 2.0 | 1.0 | 0.578885 |
| 353 | GO:0006954 | 0.482633 | Inflammatory response (1) | 3.0 | 1.0 | 0.578336 |
| 406 | GO:0048041 | 0.319689 | Focal adhesion assembly (1) | 1.0 | 1.0 | 0.577606 |
| 585 | GO:0048589 | 0.599054 | Developmental growth (1) | 4.0 | 1.0 | 0.575879 |
| 461 | GO:0050804 | 0.367884 | Modulation of chemical synaptic transmission (1) | 3.0 | 1.0 | 0.568933 |
| 711 | GO:0019752 | 0.316625 | Carboxylic acid metabolic process (1) | 4.0 | 1.0 | 0.563481 |
| 629 | GO:0051174 | 0.783163 | Regulation of phosphorus metabolic process (1) | 6.0 | 1.0 | 0.558257 |
| 82 | GO:0032743 | 0.079738 | Positive regulation of interleukin-2 production (1) | 0.0 | 1.0 | 0.545266 |
| 510 | GO:0042063 | 0.408279 | Gliogenesis (1) | 3.0 | 1.0 | 0.539129 |
| 730 | GO:0031175 | 0.631148 | Neuron projection development (1) | 4.0 | 1.0 | 0.537147 |
| 741 | GO:0016477 | 0.682158 | Cell migration (1) | 4.0 | 1.0 | 0.536651 |
| 663 | GO:0050821 | 0.165950 | Protein stabilization (1) | 0.0 | 1.0 | 0.535214 |
| 343 | GO:1902166 | 0.145254 | Negative regulation of intrinsic apoptotic signaling pathway in response to dna damage by p53 class mediator (1) | 0.0 | 1.0 | 0.525494 |
| 213 | GO:0006396 | 0.268052 | Rna processing (1) | 4.0 | 1.0 | 0.523646 |
| 563 | GO:1900272 | 0.063979 | Negative regulation of long-term synaptic potentiation (1) | 0.0 | 1.0 | 0.517858 |
| 36 | GO:0000723 | 0.137533 | Telomere maintenance (1) | 1.0 | 1.0 | 0.515446 |
| 524 | GO:0007519 | 0.230417 | Skeletal muscle tissue development (1) | 2.0 | 1.0 | 0.511692 |
| 93 | GO:0001843 | 0.082584 | Neural tube closure (1) | 1.0 | 1.0 | 0.491845 |
| 226 | GO:0035304 | 0.232643 | Regulation of protein dephosphorylation (1) | 2.0 | 1.0 | 0.490892 |
| 684 | GO:0051770 | 0.158964 | Positive regulation of nitric-oxide synthase biosynthetic process (1) | 0.0 | 1.0 | 0.483868 |
| 583 | GO:0032967 | 0.182852 | Positive regulation of collagen biosynthetic process (1) | 0.0 | 1.0 | 0.478922 |
| 641 | GO:0071417 | 0.494543 | Cellular response to organonitrogen compound (1) | 3.0 | 1.0 | 0.475108 |
| 788 | GO:0009410 | 0.407937 | Response to xenobiotic stimulus (1) | 2.0 | 1.0 | 0.474799 |
| 412 | GO:0009966 | 0.816584 | Regulation of signal transduction (1) | 5.0 | 1.0 | 0.474348 |
| 735 | GO:0046718 | 0.151222 | Viral entry into host cell (1) | 1.0 | 1.0 | 0.472175 |
| 266 | GO:0030163 | 0.372430 | Protein catabolic process (1) | 4.0 | 1.0 | 0.453053 |
| 196 | GO:0006357 | 0.230696 | Regulation of transcription by rna polymerase ii (1) | 3.0 | 1.0 | 0.447389 |
| 840 | GO:0043154 | 0.084113 | Negative regulation of cysteine-type endopeptidase activity involved in apoptotic process (1) | 1.0 | 1.0 | 0.427212 |
| 299 | GO:0030705 | 0.133167 | Cytoskeleton-dependent intracellular transport (1) | 3.0 | 1.0 | 0.424567 |
| 433 | GO:0097191 | 0.302625 | Extrinsic apoptotic signaling pathway (1) | 3.0 | 1.0 | 0.420731 |
| 610 | GO:0031333 | 0.191284 | Negative regulation of protein-containing complex assembly (1) | 2.0 | 1.0 | 0.417629 |
| 501 | GO:0007507 | 0.480118 | Heart development (1) | 4.0 | 1.0 | 0.417365 |
| 901 | GO:0042110 | 0.432222 | T cell activation (1) | 4.0 | 1.0 | 0.413843 |
| 260 | GO:0006807 | 0.931522 | Nitrogen compound metabolic process (1) | 7.0 | 1.0 | 0.413085 |
| 785 | GO:0009266 | 0.178944 | Response to temperature stimulus (1) | 2.0 | 1.0 | 0.412491 |
| 358 | GO:0007005 | 0.427023 | Mitochondrion organization (1) | 3.0 | 1.0 | 0.412213 |
| 377 | GO:0031032 | 0.257340 | Actomyosin structure organization (1) | 2.0 | 1.0 | 0.410068 |
| 484 | GO:0007399 | 0.731066 | Nervous system development (1) | 6.0 | 1.0 | 0.409544 |
| 136 | GO:0002376 | 0.760395 | Immune system process (1) | 6.0 | 1.0 | 0.409057 |
| 211 | GO:0031507 | 0.200984 | Heterochromatin assembly (1) | 1.0 | 1.0 | 0.406540 |
| 20 | GO:0070507 | 0.234661 | Regulation of microtubule cytoskeleton organization (1) | 2.0 | 1.0 | 0.406533 |
| 356 | GO:0006996 | 0.787638 | Organelle organization (1) | 5.0 | 1.0 | 0.400534 |
| 376 | GO:0007015 | 0.344878 | Actin filament organization (1) | 3.0 | 1.0 | 0.399824 |
| 173 | GO:0003300 | 0.147997 | Cardiac muscle hypertrophy (1) | 2.0 | 1.0 | 0.399424 |
| 620 | GO:0051098 | 0.363823 | Regulation of binding (1) | 3.0 | 1.0 | 0.394294 |
| 922 | GO:0036324 | 0.117627 | Vascular endothelial growth factor receptor-2 signaling pathway (1) | 0.0 | 1.0 | 0.392320 |
| 487 | GO:0030325 | 0.087339 | Adrenal gland development (1) | 0.0 | 1.0 | 0.389651 |
| 781 | GO:0008625 | 0.179216 | Extrinsic apoptotic signaling pathway via death domain receptors (1) | 1.0 | 1.0 | 0.375455 |
| 608 | GO:0043114 | 0.067682 | Regulation of vascular permeability (1) | 1.0 | 1.0 | 0.368329 |
| 103 | GO:0042531 | 0.243583 | Positive regulation of tyrosine phosphorylation of stat protein (1) | 0.0 | 1.0 | 0.365635 |
| 102 | GO:0050731 | 0.126350 | Positive regulation of peptidyl-tyrosine phosphorylation (1) | 2.0 | 1.0 | 0.364186 |
| 504 | GO:0048709 | 0.176248 | Oligodendrocyte differentiation (1) | 2.0 | 1.0 | 0.355145 |
| 324 | GO:0016241 | 0.118988 | Regulation of macroautophagy (1) | 2.0 | 1.0 | 0.349328 |
| 218 | GO:0006468 | 0.771378 | Protein phosphorylation (1) | 5.0 | 1.0 | 0.348022 |
| 171 | GO:0003014 | 0.201637 | Renal system process (1) | 2.0 | 1.0 | 0.341557 |
| 644 | GO:0071230 | 0.284292 | Cellular response to amino acid stimulus (1) | 1.0 | 1.0 | 0.332289 |
| 321 | GO:0010507 | 0.205494 | Negative regulation of autophagy (1) | 1.0 | 1.0 | 0.332037 |
| 533 | GO:0030168 | 0.219923 | Platelet activation (1) | 2.0 | 1.0 | 0.322598 |
| 546 | GO:0008104 | 0.599923 | Protein localization (1) | 5.0 | 1.0 | 0.318133 |
| 362 | GO:0033043 | 0.644550 | Regulation of organelle organization (1) | 4.0 | 1.0 | 0.315090 |
| 851 | GO:0070933 | 0.058559 | Histone h4 deacetylation (1) | 0.0 | 1.0 | 0.313671 |
| 460 | GO:0023061 | 0.159325 | Signal release (1) | 4.0 | 1.0 | 0.312808 |
| 438 | GO:0007179 | 0.169016 | Transforming growth factor beta receptor signaling pathway (1) | 1.0 | 1.0 | 0.304505 |
| 300 | GO:0032386 | 0.384834 | Regulation of intracellular transport (1) | 2.0 | 1.0 | 0.296704 |
| 920 | GO:0036092 | 0.063107 | Phosphatidylinositol-3-phosphate biosynthetic process (1) | 0.0 | 1.0 | 0.293434 |
| 505 | GO:0007283 | 0.290337 | Spermatogenesis (1) | 2.0 | 1.0 | 0.287750 |
| 357 | GO:0006997 | 0.062583 | Nucleus organization (1) | 2.0 | 1.0 | 0.284535 |
| 748 | GO:0009056 | 0.627534 | Catabolic process (1) | 5.0 | 1.0 | 0.276960 |
| 48 | GO:0001525 | 0.386728 | Angiogenesis (1) | 2.0 | 1.0 | 0.264850 |
| 541 | GO:0008542 | 0.131766 | Visual learning (1) | 0.0 | 1.0 | 0.264386 |
| 98 | GO:0001932 | 0.669289 | Regulation of protein phosphorylation (1) | 4.0 | 1.0 | 0.263137 |
| 734 | GO:0051702 | 0.152779 | Biological process involved in interaction with symbiont (1) | 2.0 | 1.0 | 0.263093 |
| 243 | GO:0006606 | 0.250144 | Protein import into nucleus (1) | 1.0 | 1.0 | 0.261493 |
| 513 | GO:0030900 | 0.289030 | Forebrain development (1) | 3.0 | 1.0 | 0.257354 |
| 673 | GO:0043392 | 0.109472 | Negative regulation of dna binding (1) | 1.0 | 1.0 | 0.255228 |
| 346 | GO:0006936 | 0.187431 | Muscle contraction (1) | 3.0 | 1.0 | 0.250863 |
| 552 | GO:0033365 | 0.343027 | Protein localization to organelle (1) | 3.0 | 1.0 | 0.239223 |
| 9 | GO:0043408 | 0.476159 | Regulation of mapk cascade (1) | 2.0 | 1.0 | 0.238056 |
| 887 | GO:0034504 | 0.294393 | Protein localization to nucleus (1) | 2.0 | 1.0 | 0.236099 |
| 598 | GO:0043254 | 0.240098 | Regulation of protein-containing complex assembly (1) | 3.0 | 1.0 | 0.234151 |
| 280 | GO:0006810 | 0.773180 | Transport (1) | 7.0 | 1.0 | 0.229528 |
| 267 | GO:0045732 | 0.069485 | Positive regulation of protein catabolic process (1) | 2.0 | 1.0 | 0.220326 |
| 668 | GO:0010976 | 0.084952 | Positive regulation of neuron projection development (1) | 1.0 | 1.0 | 0.216502 |
| 212 | GO:0051090 | 0.131395 | Regulation of dna-binding transcription factor activity (1) | 2.0 | 1.0 | 0.215454 |
| 799 | GO:0009743 | 0.121060 | Response to carbohydrate (1) | 2.0 | 1.0 | 0.214795 |
| 675 | GO:0071277 | 0.074147 | Cellular response to calcium ion (1) | 0.0 | 1.0 | 0.211553 |
| 495 | GO:0060976 | 0.094845 | Coronary vasculature development (1) | 1.0 | 1.0 | 0.210353 |
| 803 | GO:0042475 | 0.125550 | Odontogenesis of dentin-containing tooth (1) | 2.0 | 1.0 | 0.208932 |
| 671 | GO:0032092 | 0.120001 | Positive regulation of protein binding (1) | 1.0 | 1.0 | 0.204806 |
| 772 | GO:0060485 | 0.320964 | Mesenchyme development (1) | 3.0 | 1.0 | 0.203758 |
| 121 | GO:0090050 | 0.180206 | Positive regulation of cell migration involved in sprouting angiogenesis (1) | 0.0 | 1.0 | 0.202697 |
| 801 | GO:0033143 | 0.078487 | Regulation of intracellular steroid hormone receptor signaling pathway (1) | 1.0 | 1.0 | 0.199503 |
| 870 | GO:0070527 | 0.179383 | Platelet aggregation (1) | 1.0 | 1.0 | 0.197119 |
| 314 | GO:0070588 | 0.238228 | Calcium ion transmembrane transport (1) | 4.0 | 1.0 | 0.196919 |
| 656 | GO:0030154 | 0.829241 | Cell differentiation (1) | 6.0 | 1.0 | 0.193962 |
| 645 | GO:0071300 | 0.128940 | Cellular response to retinoic acid (1) | 0.0 | 1.0 | 0.193772 |
| 897 | GO:0035767 | 0.138830 | Endothelial cell chemotaxis (1) | 1.0 | 1.0 | 0.192330 |
| 332 | GO:0097190 | 0.410188 | Apoptotic signaling pathway (1) | 4.0 | 1.0 | 0.189603 |
| 643 | GO:0071222 | 0.143456 | Cellular response to lipopolysaccharide (1) | 2.0 | 1.0 | 0.189188 |
| 898 | GO:0035924 | 0.211788 | Cellular response to vascular endothelial growth factor stimulus (1) | 2.0 | 1.0 | 0.184593 |
| 459 | GO:0007267 | 0.506882 | Cell-cell signaling (1) | 5.0 | 1.0 | 0.184371 |
| 323 | GO:0016236 | 0.245469 | Macroautophagy (1) | 3.0 | 1.0 | 0.183750 |
| 317 | GO:0006897 | 0.277768 | Endocytosis (1) | 3.0 | 1.0 | 0.178141 |
| 244 | GO:0042307 | 0.225164 | Positive regulation of protein import into nucleus (1) | 0.0 | 1.0 | 0.177873 |
| 728 | GO:0030032 | 0.140303 | Lamellipodium assembly (1) | 1.0 | 1.0 | 0.163285 |
| 640 | GO:0071310 | 0.661841 | Cellular response to organic substance (1) | 4.0 | 1.0 | 0.156044 |
| 686 | GO:0097009 | 0.055448 | Energy homeostasis (1) | 0.0 | 1.0 | 0.155920 |
| 692 | GO:0010595 | 0.235403 | Positive regulation of endothelial cell migration (1) | 2.0 | 1.0 | 0.154755 |
| 309 | GO:0034765 | 0.277608 | Regulation of ion transmembrane transport (1) | 4.0 | 1.0 | 0.153881 |
| 770 | GO:0009887 | 0.516411 | Animal organ morphogenesis (1) | 4.0 | 1.0 | 0.152678 |
| 209 | GO:0006338 | 0.181855 | Chromatin remodeling (1) | 2.0 | 1.0 | 0.147943 |
| 547 | GO:0032880 | 0.479479 | Regulation of protein localization (1) | 4.0 | 1.0 | 0.144928 |
| 918 | GO:0046854 | 0.054745 | Phosphatidylinositol phosphate biosynthetic process (1) | 1.0 | 1.0 | 0.142413 |
| 329 | GO:0043066 | 0.610305 | Negative regulation of apoptotic process (1) | 4.0 | 1.0 | 0.132954 |
| 824 | GO:0071363 | 0.407544 | Cellular response to growth factor stimulus (1) | 3.0 | 1.0 | 0.122808 |
| 603 | GO:0061045 | 0.068278 | Negative regulation of wound healing (1) | 2.0 | 1.0 | 0.122775 |
| 225 | GO:0006470 | 0.305848 | Protein dephosphorylation (1) | 3.0 | 1.0 | 0.111956 |
| 388 | GO:0051726 | 0.483775 | Regulation of cell cycle (1) | 5.0 | 1.0 | 0.109432 |
| 516 | GO:0007423 | 0.372363 | Sensory organ development (1) | 3.0 | 1.0 | 0.106514 |
| 261 | GO:0051171 | 0.836045 | Regulation of nitrogen compound metabolic process (1) | 6.0 | 1.0 | 0.100916 |
| 375 | GO:0051496 | 0.076120 | Positive regulation of stress fiber assembly (1) | 0.0 | 1.0 | 0.097110 |
| 758 | GO:0031099 | 0.247922 | Regeneration (1) | 2.0 | 1.0 | 0.096133 |
| 55 | GO:0001649 | 0.136472 | Osteoblast differentiation (1) | 1.0 | 1.0 | 0.091104 |
| 227 | GO:0032516 | 0.155024 | Positive regulation of phosphoprotein phosphatase activity (1) | 0.0 | 1.0 | 0.090968 |
| 191 | GO:0045944 | 0.420524 | Positive regulation of transcription by rna polymerase ii (1) | 2.0 | 1.0 | 0.084844 |
| 283 | GO:0051049 | 0.611523 | Regulation of transport (1) | 5.0 | 1.0 | 0.084359 |
| 739 | GO:1902903 | 0.245529 | Regulation of supramolecular fiber organization (1) | 3.0 | 1.0 | 0.083258 |
| 569 | GO:0030307 | 0.190101 | Positive regulation of cell growth (1) | 2.0 | 1.0 | 0.078819 |
| 679 | GO:0042310 | 0.060677 | Vasoconstriction (1) | 1.0 | 1.0 | 0.075349 |
| 405 | GO:0007159 | 0.162103 | Leukocyte cell-cell adhesion (1) | 3.0 | 1.0 | 0.074992 |
| 566 | GO:0032879 | 0.660318 | Regulation of localization (1) | 6.0 | 1.0 | 0.074219 |
| 568 | GO:0008284 | 0.513916 | Positive regulation of cell population proliferation (1) | 2.0 | 1.0 | 0.073069 |
| 561 | GO:0048511 | 0.251652 | Rhythmic process (1) | 3.0 | 1.0 | 0.067632 |
| 702 | GO:0048146 | 0.179210 | Positive regulation of fibroblast proliferation (1) | 0.0 | 1.0 | 0.067004 |
| 403 | GO:0033628 | 0.059919 | Regulation of cell adhesion mediated by integrin (1) | 1.0 | 1.0 | 0.061975 |
| 885 | GO:1900180 | 0.205300 | Regulation of protein localization to nucleus (1) | 1.0 | 1.0 | 0.060202 |
| 228 | GO:0006508 | 0.347532 | Proteolysis (1) | 4.0 | 1.0 | 0.056727 |
| 560 | GO:0043473 | 0.188137 | Pigmentation (1) | 2.0 | 1.0 | 0.036612 |
| 777 | GO:0050680 | 0.103322 | Negative regulation of epithelial cell proliferation (1) | 2.0 | 1.0 | 0.036383 |
| 751 | GO:0046034 | 0.094236 | Atp metabolic process (1) | 2.0 | 1.0 | 0.034832 |
| 591 | GO:0010941 | 0.705559 | Regulation of cell death (1) | 5.0 | 1.0 | 0.026616 |
| 818 | GO:0010243 | 0.557617 | Response to organonitrogen compound (1) | 4.0 | 1.0 | 0.022040 |
| 724 | GO:0036473 | 0.154959 | Cell death in response to oxidative stress (1) | 2.0 | 1.0 | 0.021429 |
| 153 | GO:0050776 | 0.424435 | Regulation of immune response (1) | 4.0 | 1.0 | 0.011047 |
| 521 | GO:0035051 | 0.187551 | Cardiocyte differentiation (1) | 2.0 | 1.0 | 0.003905 |
| 194 | GO:0006355 | 0.544373 | Regulation of transcription, dna-templated (1) | 4.0 | 1.0 | 0.003607 |
| 622 | GO:0043086 | 0.212387 | Negative regulation of catalytic activity (1) | 3.0 | 1.0 | -0.003920 |
| 494 | GO:0060840 | 0.094489 | Artery development (1) | 2.0 | 1.0 | -0.013518 |
| 936 | GO:0051000 | 0.059868 | Positive regulation of nitric-oxide synthase activity (1) | 0.0 | 1.0 | -0.017649 |
| 382 | GO:0030041 | 0.093571 | Actin filament polymerization (1) | 2.0 | 1.0 | -0.024295 |
| 712 | GO:0033002 | 0.280632 | Muscle cell proliferation (1) | 2.0 | 1.0 | -0.031117 |
| 750 | GO:0044281 | 0.305872 | Small molecule metabolic process (1) | 5.0 | 1.0 | -0.053702 |
| 257 | GO:0046488 | 0.135113 | Phosphatidylinositol metabolic process (1) | 2.0 | 1.0 | -0.069795 |
| 731 | GO:0031529 | 0.036028 | Ruffle organization (1) | 1.0 | 1.0 | -0.085835 |
| 469 | GO:0060173 | 0.072132 | Limb development (1) | 1.0 | 1.0 | -0.088184 |
| 296 | GO:0015031 | 0.292221 | Protein transport (1) | 4.0 | 1.0 | -0.098692 |
| 927 | GO:0042632 | 0.034870 | Cholesterol homeostasis (1) | 0.0 | 1.0 | -0.119705 |
| 78 | GO:0032760 | 0.100402 | Positive regulation of tumor necrosis factor production (1) | 0.0 | 1.0 | -0.132748 |
| 856 | GO:0019722 | 0.034146 | Calcium-mediated signaling (1) | 2.0 | 1.0 | -0.141431 |
| 288 | GO:0032940 | 0.217398 | Secretion by cell (1) | 5.0 | 1.0 | -0.147194 |
| 705 | GO:0051353 | 0.082787 | Positive regulation of oxidoreductase activity (1) | 1.0 | 1.0 | -0.158575 |
| 176 | GO:0044262 | 0.129234 | Cellular carbohydrate metabolic process (1) | 3.0 | 1.0 | -0.190099 |
| 180 | GO:0019318 | 0.046251 | Hexose metabolic process (1) | 2.0 | 1.0 | -0.211363 |
| 440 | GO:0030512 | 0.045444 | Negative regulation of transforming growth factor beta receptor signaling pathway (1) | 0.0 | 1.0 | -0.229806 |
| 331 | GO:0071887 | 0.072964 | Leukocyte apoptotic process (1) | 2.0 | 1.0 | -0.295526 |
| 823 | GO:0045471 | 0.068117 | Response to ethanol (1) | 1.0 | 1.0 | -0.639920 |
| 232 | GO:0010951 | 0.025246 | Negative regulation of endopeptidase activity (1) | 2.0 | 1.0 | -0.917276 |
names2 = list(probabilities_mod.loc[probabilities_mod["predictions"] ==1].loc[probabilities_mod["layer_number"] <=7].sort_values(by=["delta_logits"], ascending=False)["Name"].head(30))
terms2 = list(probabilities_mod.loc[probabilities_mod["predictions"] ==1].loc[probabilities_mod["layer_number"] <=7].sort_values(by=["delta_logits"], ascending=False)["GO_term"].head(30))
logits2 = list(probabilities_mod.loc[probabilities_mod["predictions"] ==1].loc[probabilities_mod["layer_number"] <=7].sort_values(by=["delta_logits"], ascending=False)["delta_logits"].head(30))
names2 = [x[:-4] for x in names2] for i in range(0,len(names2)):
print(terms2[i],names2[i],logits2[i])GO:0031047 Gene silencing by rna 2.192457619336144
GO:0031648 Protein destabilization 2.092833916210919
GO:0071900 Regulation of protein serine/threonine kinase activity 1.944569179670069
GO:0000423 Mitophagy 1.8800065969407627
GO:0048266 Behavioral response to pain 1.8570307139212263
GO:0090630 Activation of gtpase activity 1.793943480641404
GO:0045727 Positive regulation of translation 1.7460704310285706
GO:0018108 Peptidyl-tyrosine phosphorylation 1.6728257190053135
GO:0043244 Regulation of protein-containing complex disassembly 1.6460980615310405
GO:0006612 Protein targeting to membrane 1.6268405228374492
GO:0006469 Negative regulation of protein kinase activity 1.5937943848967007
GO:0034976 Response to endoplasmic reticulum stress 1.575980420814227
GO:0071353 Cellular response to interleukin-4 1.5752555355925209
GO:0033554 Cellular response to stress 1.573975479674923
GO:0033993 Response to lipid 1.5515734212063519
GO:0001817 Regulation of cytokine production 1.5347154822720621
GO:0007026 Negative regulation of microtubule depolymerization 1.5337813437727923
GO:0007565 Female pregnancy 1.4788810435976616
GO:0046777 Protein autophosphorylation 1.453821209026271
GO:0060179 Male mating behavior 1.4409006464912832
GO:0000422 Autophagy of mitochondrion 1.4098807478143418
GO:0030216 Keratinocyte differentiation 1.406529668352229
GO:0048812 Neuron projection morphogenesis 1.3850039160074457
GO:0001553 Luteinization 1.3733255324441331
GO:0002718 Regulation of cytokine production involved in immune response 1.3728274508963876
GO:0042060 Wound healing 1.325910525701464
GO:0060632 Regulation of microtubule-based movement 1.3061039503079361
GO:0065003 Protein-containing complex assembly 1.304383769510548
GO:0060020 Bergmann glial cell differentiation 1.2886961413918279
GO:1902459 Positive regulation of stem cell population maintenance 1.2787216247479065
# import libraries
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
# set font
plt.rcParams['font.family'] = 'sans-serif'
plt.rcParams['font.sans-serif'] = 'Roboto'
# set the style of the axes and the text color
plt.rcParams['axes.edgecolor']='#333F4B'
plt.rcParams['axes.linewidth']=0.8
plt.rcParams['xtick.color']='#333F4B'
plt.rcParams['ytick.color']='#333F4B'
plt.rcParams['text.color']='#333F4B'
# create some fake data
percentages = pd.Series(logits2,
index=names2)
df = pd.DataFrame({'percentage' : percentages})
df = df.sort_values(by='percentage')
# we first need a numeric placeholder for the y axis
my_range=list(range(1,len(df.index)+1))
fig, ax = plt.subplots(figsize=(4,17))
# create for each expense type an horizontal line that starts at x = 0 with the length
# represented by the specific expense percentage value.
plt.hlines(y=my_range, xmin=0, xmax=df['percentage'], color='#208EA3', alpha=0.2, linewidth=14)
# create for each expense type a dot at the level of the expense percentage value
plt.plot(df['percentage'], my_range, "o", markersize=14, color='#208EA3', alpha=0.8)
# set labels
ax.set_xlabel(' Δlogit', fontsize=25, fontweight='black', color = '#36382E')
ax.set_ylabel('')
ax.set_facecolor(color="white")
ax.set_alpha(1)
# set axis
ax.tick_params(axis='both', which='major', labelsize=30)
plt.yticks(my_range, df.index)
# add an horizonal label for the y axis
fig.text(-0.58, 0.862, 'MoA (GO terms)', fontsize=27, fontweight='black', color = '#36382E')
fig.text(0.2, 0.9, selected_drug_u_name.capitalize(), fontsize=30, fontweight='black', color = '#36382E')
# change the style of the axis spines
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_bounds((1, len(my_range)))
ax.set_xlim(0,max(logits2)+0.1)
ax.spines['left'].set_position(('outward', 8))
ax.spines['bottom'].set_position(('outward', 5))
plt.savefig(resultsdir+selected_drug_u_name+'_top_terms.png', dpi=300, bbox_inches='tight')
For known drug…
display(combobox)selected_drug_name = combobox.result# LOS LOGITS DE TEST!!
train_drug_logs = pd.DataFrame(platt_matrix.loc[:,selected_drug_name]).reset_index()
train_drug_logs.columns = ["GO_term","probability"]
train_drug_logs = train_drug_logs.merge(real_go_info_svm, on="GO_term")train_drug_logs.loc[train_drug_logs["layer_number"] <=3].sort_values(by=["probability"], ascending=False).head(30)# # For known drugs
len((set(train_drug_logs.loc[train_drug_logs["layer_number"] <=3].sort_values(by=["probability"], ascending=False).head(30)["GO_term"])).intersection(set(pd.DataFrame(compounds_GOterms_matches[selected_drug_name])[1])))30
ax = sns.boxplot(x=slim_matrix_single_neuron.loc[train_drug_logs["GO_term"],selected_drug_name], y=train_drug_logs.set_index("GO_term")["probability"], data=plot,showfliers=True )
# same as before
sum_annotations = slim_matrix_single_neuron.T.sum()/slim_matrix_single_neuron.shape[1]
logits_apriori= np.log(sum_annotations/(1-sum_annotations))logits_apost= np.log(train_drug_logs["probability"]/(1-train_drug_logs["probability"]))
delta_logits = logits_apost.to_numpy()-logits_apriori.to_numpy()
delta_logits_df = pd.DataFrame(delta_logits)
delta_logits_df.columns = ["delta_logits"]
train_drug_mod = train_drug_logs.merge(delta_logits_df, left_index=True,right_index=True)train_drug_mod.loc[train_drug_mod["layer_number"] <=3].sort_values(by=["delta_logits"], ascending=False).head(30)| GO_term | probability | Name | layer_number | delta_logits | |
|---|---|---|---|---|---|
| 578 | GO:2000379 | 0.603184 | Positive regulation of reactive oxygen species metabolic process (1) | 1.0 | 2.770126 |
| 253 | GO:0043552 | 0.735601 | Positive regulation of phosphatidylinositol 3-kinase activity (1) | 0.0 | 2.707570 |
| 80 | GO:0010575 | 0.438910 | Positive regulation of vascular endothelial growth factor production (1) | 0.0 | 2.282492 |
| 633 | GO:0051301 | 0.817491 | Cell division (1) | 2.0 | 2.245231 |
| 224 | GO:0046777 | 0.780375 | Protein autophosphorylation (1) | 1.0 | 2.053782 |
| 423 | GO:1902533 | 0.825532 | Positive regulation of intracellular signal transduction (1) | 2.0 | 2.014408 |
| 458 | GO:0035025 | 0.374269 | Positive regulation of rho protein signal transduction (1) | 0.0 | 1.952270 |
| 848 | GO:0071670 | 0.345148 | Smooth muscle cell chemotaxis (1) | 0.0 | 1.887644 |
| 348 | GO:0006939 | 0.412178 | Smooth muscle contraction (1) | 2.0 | 1.842256 |
| 350 | GO:0045987 | 0.348868 | Positive regulation of smooth muscle contraction (1) | 1.0 | 1.783402 |
| 908 | GO:0051899 | 0.591286 | Membrane depolarization (1) | 2.0 | 1.675960 |
| 653 | GO:0072593 | 0.631785 | Reactive oxygen species metabolic process (1) | 3.0 | 1.650121 |
| 115 | GO:0060312 | 0.275354 | Regulation of blood vessel remodeling (1) | 0.0 | 1.625762 |
| 926 | GO:0060020 | 0.369360 | Bergmann glial cell differentiation (1) | 0.0 | 1.614859 |
| 923 | GO:0048170 | 0.295742 | Positive regulation of long-term neuronal synaptic plasticity (1) | 0.0 | 1.598558 |
| 99 | GO:0001934 | 0.823688 | Positive regulation of protein phosphorylation (1) | 3.0 | 1.593723 |
| 853 | GO:0038083 | 0.500000 | Peptidyl-tyrosine autophosphorylation (1) | 0.0 | 1.588712 |
| 713 | GO:0035726 | 0.210551 | Common myeloid progenitor cell proliferation (1) | 0.0 | 1.577983 |
| 445 | GO:0048008 | 0.414654 | Platelet-derived growth factor receptor signaling pathway (1) | 1.0 | 1.552362 |
| 857 | GO:0035584 | 0.298388 | Calcium-mediated signaling using intracellular calcium source (1) | 0.0 | 1.496390 |
| 333 | GO:1904019 | 0.457451 | Epithelial cell apoptotic process (1) | 1.0 | 1.481165 |
| 933 | GO:0051150 | 0.337253 | Regulation of smooth muscle cell differentiation (1) | 1.0 | 1.474264 |
| 352 | GO:0014827 | 0.254529 | Intestine smooth muscle contraction (1) | 0.0 | 1.453480 |
| 814 | GO:0090037 | 0.235572 | Positive regulation of protein kinase c signaling (1) | 0.0 | 1.416277 |
| 894 | GO:0048017 | 0.609803 | Inositol lipid-mediated signaling (1) | 1.0 | 1.399588 |
| 506 | GO:0007286 | 0.240113 | Spermatid development (1) | 1.0 | 1.376021 |
| 742 | GO:0035733 | 0.239304 | Hepatic stellate cell activation (1) | 0.0 | 1.371582 |
| 10 | GO:0051403 | 0.429898 | Stress-activated mapk cascade (1) | 2.0 | 1.369506 |
| 702 | GO:0048146 | 0.440487 | Positive regulation of fibroblast proliferation (1) | 0.0 | 1.349525 |
| 782 | GO:1902042 | 0.289337 | Negative regulation of extrinsic apoptotic signaling pathway via death domain receptors (1) | 0.0 | 1.347889 |
ax = sns.boxplot(x=slim_matrix_single_neuron.loc[train_drug_mod["GO_term"],selected_drug_name], y=train_drug_mod.set_index("GO_term")["delta_logits"], data=plot,showfliers=True)
SVM GO TERM 2D representation
from sklearn.manifold import TSNE
import plotly.express as pxChoose go to study…
display(combobox_go)selected_goterm = combobox_go.resultreal_go_info[real_go_info["GO_term"]==selected_goterm+"_1"]| GO_term | Name | layer_number | |
|---|---|---|---|
| 4338 | GO:0071353_1 | Cellular response to interleukin-4 (1) | 1.0 |
list_nodes = []
for i in range(1,7):
list_nodes.append(selected_goterm+"_"+str(i))
score = attribution_data_annotated.loc[list_nodes].T
score_mod = score.divide(score.std()).fillna(0)
annotations =slim_matrix_single_neuron.loc[selected_goterm,]
y_predicted = models_svm[selected_goterm].predict(score_mod.astype(float))Plot SVM
View statistics of GOterm
“Perfect” model (with train data)
auc = metrics.roc_auc_score(annotations, models_svm[selected_goterm].decision_function(score_mod.astype(float)))
cnf_matrix = metrics.confusion_matrix(annotations,y_predicted)
print(cnf_matrix)
print("Accuracy:",metrics.accuracy_score(annotations, y_predicted))
print("Precision:",metrics.precision_score(annotations,y_predicted)) # TP / (TP+FP)
print("Recall:",metrics.recall_score(annotations, y_predicted)) #TP / (TP+FN)
print("AUC with score:",auc) [[206 6]
[ 2 16]]
Accuracy: 0.9652173913043478
Precision: 0.7272727272727273
Recall: 0.8888888888888888
AUC with score: 0.9855870020964361
TN - FP
FN - TP
En mi opinion interesa mucho el precision, prefiero que haya menos FP no??
Test statistics…
auc = metrics.roc_auc_score(slim_matrix_single_neuron.loc[selected_goterm], platt_matrix.loc[selected_goterm])
cnf_matrix = metrics.confusion_matrix(slim_matrix_single_neuron.loc[selected_goterm], preds_svm_matrix.loc[selected_goterm])
print(cnf_matrix)
print("Accuracy:",metrics.accuracy_score(slim_matrix_single_neuron.loc[selected_goterm], preds_svm_matrix.loc[selected_goterm]))
print("Precision:",metrics.precision_score(slim_matrix_single_neuron.loc[selected_goterm], preds_svm_matrix.loc[selected_goterm]))
print("Recall:",metrics.recall_score(slim_matrix_single_neuron.loc[selected_goterm], preds_svm_matrix.loc[selected_goterm])) #TP / (TP+FN)
print("AUC with score:",auc) #TP / (TP+FN)[[203 9]
[ 4 14]]
Accuracy: 0.9434782608695652
Precision: 0.6086956521739131
Recall: 0.7777777777777778
AUC with score: 0.9095911949685536
import colorlover as cl
matrix = metrics.confusion_matrix(annotations,y_predicted)
tn, fp, fn, tp = matrix.ravel()
values = [tp, fn, fp, tn]
label_text = ["True Positive", "False Negative", "False Positive", "True Negative"]
labels = ["<b>TP</b>", "<b>FN</b>", "<b>FP</b>", "<b>TN</b>"]
blue = cl.flipper()["seq"]["9"]["Blues"]
red = cl.flipper()["seq"]["9"]["Reds"]
colors = ["#ff3700","#FFA0A0", "#CCE9FF", "#0b8bff"]
trace0 = go.Pie(
labels=label_text,
values=values,
hoverinfo="label+value+percent",
textinfo="text+value",
text=labels,
sort=False,
marker=dict(colors=colors),
insidetextfont={"color": "#36382E"},
rotation=90,
)
layout = go.Layout(
title=dict(text="Confusion Matrix",
x=0.3,
y=0.8,
font=dict(size=14),
xanchor='center',
yanchor='top'),
#margin=dict(l=50, r=50, t=100, b=10),
legend=dict(font={"color": "#36382E"}, orientation="h",x=0.1, y=-0.03),
# plot_bgcolor="#282b38",
# paper_bgcolor="#282b38",
font=dict(family='Roboto',color= "#36382E"),
)
data = [trace0]
figure = go.Figure(data=data, layout=layout)
figurey_test=annotations
decision_test=y_predicted
fpr, tpr, threshold = metrics.roc_curve(y_test, decision_test)
# AUC Score
auc_score = metrics.roc_auc_score(y_true=y_test, y_score=decision_test)
trace0 = go.Scatter(
x=fpr, y=tpr, mode="lines", name="Test Data", marker={"color": "#ff3700"}
)
layout = go.Layout(
title=dict(text=f"ROC Curve (AUC = {auc_score:.3f})",
x=0.6,
y=0.5,
font=dict(size=20)
),
xaxis=dict(title="False Positive Rate", gridcolor="white"),
yaxis=dict(title="True Positive Rate", gridcolor="white"),
legend=dict(x=0, y=1.05, orientation="h"),
margin=dict(l=100, r=10, t=25, b=40),
# plot_bgcolor="#282b38",
# paper_bgcolor="#282b38",
font=dict(family='Roboto',color= "#36382E"),
)
data = [trace0]
figure = go.Figure(data=data, layout=layout)
figurePlot SVM with unknown labels
Voronoi Tessellation
What is a Voronoi Tessellation? Given a set P := {p1, …, pn} of sites, a Voronoi Tessellation is a subdivision of the space into n cells, one for each site in P, with the property that a point q lies in the cell corresponding to a site pi iff d(pi, q) < d(pj, q) for i distinct from j. The segments in a Voronoi Tessellation correspond to all points in the plane equidistant to the two nearest sites. Voronoi Tessellations have applications in computer science.
https://stackoverflow.com/questions/61225052/svm-plot-decision-surface-when-working-with-more-than-2-features
tsne = TSNE(n_components=2, verbose=0,
init="pca",
perplexity=30,
random_state=123
)
z = tsne.fit_transform(score_mod.astype(float)) list_nodes = list(models_svm[selected_goterm].feature_names_in_) # Extract the feature names from the model (those are the attributions we need)
score_unknown = attribution_data_all.loc[list_nodes,unknown].T
score_unknown_mod = score_unknown.divide(score.std()).fillna(0) # normalizey_unknown = np.full(score_unknown_mod.shape[0],2) # 2=unknown MOA
y_pred_unknown = models_svm[selected_goterm].predict(score_unknown_mod.astype(float))# join scores and annotations from known and unknown drugs
all_score = pd.concat([score_mod,score_unknown_mod])
all_y = np.concatenate((annotations,y_unknown)) # 2=unknown MOAPlot T-SNE SVM
from sklearn.neighbors._classification import KNeighborsClassifier
# https://github.com/plotly/dash-sample-apps/blob/main/apps/dash-svm/utils/dash_reusable_components.py
z = tsne.fit_transform(all_score.astype(float))
df = pd.DataFrame()
df["y"] = all_y
df["comp-1"] = z[:,0]
df["comp-2"] = z[:,1]
df["name"] =list(all_score.index)
df = df.sort_values(by=['y'])
df["y"] = df["y"].astype(str)
X,y = all_score.astype(float), all_y
y_predicted = models_svm[selected_goterm].predict(X)
resolution = 300 # 100x100 background pixels
X2d_xmin, X2d_xmax = np.min(z[:,0])-1, np.max(z[:,0])+1
X2d_ymin, X2d_ymax = np.min(z[:,1])-1, np.max(z[:,1])+1
xx, yy = np.meshgrid(np.linspace(X2d_xmin, X2d_xmax, resolution), np.linspace(X2d_ymin, X2d_ymax, resolution))
# approximate Voronoi tesselation on resolution x resolution grid using 1-NN
background_model = KNeighborsClassifier(n_neighbors=1).fit(z, y_predicted)
voronoiBackground = background_model.predict(np.c_[xx.ravel(), yy.ravel()])
voronoiBackground = voronoiBackground.reshape((resolution, resolution))go_name=real_go_info[real_go_info["GO_term"]==selected_goterm+"_1"]["Name"].values[0][:-4]
go_name'Cellular response to interleukin-4'
bright_cscale = [[0, "#0b8bff"], [0.5, "#ff3700"],[1, "#36382E"]]
new_cscale = [[0, "#CCE9FF"], [1, "#FFA0A0"]]
trace0 = go.Contour(
x=xx.flatten(),
y=yy.flatten(),
z=voronoiBackground.flatten(),
hoverinfo="none",
showscale=False,
contours=dict(showlines=False),
colorscale=new_cscale,
opacity=0.9,
)
trace1 = go.Contour(
x=xx.flatten(),
y=yy.flatten(),
z=voronoiBackground.flatten(),
showscale=False,
hoverinfo="none",
colorscale=new_cscale,
line=dict(color="#ff3700"),
)
trace2 = go.Scatter(
x=df["comp-1"],
y=df["comp-2"],
mode="markers",
text=df["name"].to_numpy(),
marker=dict(size=7, color=df["y"].to_numpy(int),colorscale=bright_cscale),
showlegend=False
)
legend1 = go.Scatter(
x=[None],
y=[None],
mode="markers",
name="Not annotated to<br>"+selected_goterm,
marker=dict(size=7, color="#0b8bff",symbol='circle'),
)
legend2 = go.Scatter(
x=[None],
y=[None],
mode="markers",
name="Drug annotated to<br>"+selected_goterm,
marker=dict(size=7, color="#ff3700",symbol='circle'),
)
legend3 = go.Scatter(
x=[None],
y=[None],
mode="markers",
name="Unknown MOA<br>annotations",
marker=dict(size=7, color="#36382E",symbol='circle'),
)
layout = go.Layout(
title=dict(text="<b>"+selected_goterm+"</b> "+go_name,
x=0.5,
y=0.92,
font=dict(size=18),
xanchor='center',
yanchor='top'),
xaxis=dict(ticks="", showticklabels=False, showgrid=False, zeroline=False),
yaxis=dict(ticks="", showticklabels=False, showgrid=False, zeroline=False),
yaxis_range=[min(yy.flatten()),max(yy.flatten())],
xaxis_range=[min(xx.flatten()),max(xx.flatten())],
legend=dict(x=0, y=0, orientation="h",font=dict(size=14)),
paper_bgcolor='rgba(0,0,0,0)',
width=600, height=800,
font=dict(family='Roboto',color= "#36382E",size=15)
)
data = [trace0,trace1,trace2,legend2,legend1,legend3]
figure = go.Figure(data=data,layout=layout)
figure